Lean  $LEAN_TAG$
CoarseUniverseGeneratorProgram.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
20 using QuantConnect.Util;
21 using System;
22 using System.Collections.Concurrent;
23 using System.Collections.Generic;
24 using System.Globalization;
25 using System.IO;
26 using System.Linq;
27 using System.Threading;
28 using System.Threading.Tasks;
30 using DateTime = System.DateTime;
31 using Log = QuantConnect.Logging.Log;
35 
37 {
38  /// <summary>
39  /// Coarse
40  /// </summary>
42  {
43  /// <summary>
44  /// Has fundamental data source
45  /// </summary>
46  public const FundamentalProperty HasFundamentalSource = FundamentalProperty.CompanyReference_CompanyId;
47 
48  private readonly DirectoryInfo _dailyDataFolder;
49  private readonly DirectoryInfo _destinationFolder;
50  private readonly IMapFileProvider _mapFileProvider;
51  private readonly IFactorFileProvider _factorFileProvider;
52  private readonly string _market;
53  private readonly FileInfo _blackListedTickersFile;
54 
55  /// <summary>
56  /// Runs the Coarse universe generator with default values.
57  /// </summary>
58  /// <returns></returns>
59  public static bool CoarseUniverseGenerator()
60  {
61  var dailyDataFolder = new DirectoryInfo(Path.Combine(Globals.DataFolder, SecurityType.Equity.SecurityTypeToLower(), Market.USA, Resolution.Daily.ResolutionToLower()));
62  var destinationFolder = new DirectoryInfo(Path.Combine(Globals.DataFolder, SecurityType.Equity.SecurityTypeToLower(), Market.USA, "fundamental", "coarse"));
63  var blackListedTickersFile = new FileInfo("blacklisted-tickers.txt");
64  var reservedWordPrefix = Config.Get("reserved-words-prefix", "quantconnect-");
65  var dataProvider = new DefaultDataProvider();
66  var mapFileProvider = new LocalDiskMapFileProvider();
67  mapFileProvider.Initialize(dataProvider);
68  var factorFileProvider = new LocalDiskFactorFileProvider();
69  factorFileProvider.Initialize(mapFileProvider, dataProvider);
70  FundamentalService.Initialize(dataProvider, nameof(CoarseFundamentalDataProvider), false);
71  var generator = new CoarseUniverseGeneratorProgram(dailyDataFolder, destinationFolder, Market.USA, blackListedTickersFile, reservedWordPrefix, mapFileProvider, factorFileProvider);
72  return generator.Run(out _, out _);
73  }
74 
75  /// <summary>
76  /// Initializes a new instance of the <see cref="CoarseUniverseGeneratorProgram"/> class.
77  /// </summary>
78  /// <param name="dailyDataFolder">The daily data folder.</param>
79  /// <param name="destinationFolder">The destination folder.</param>
80  /// <param name="market">The market.</param>
81  /// <param name="blackListedTickersFile">The black listed tickers file.</param>
82  /// <param name="reservedWordsPrefix">The reserved words prefix.</param>
83  /// <param name="mapFileProvider">The map file provider.</param>
84  /// <param name="factorFileProvider">The factor file provider.</param>
85  /// <param name="debugEnabled">if set to <c>true</c> [debug enabled].</param>
87  DirectoryInfo dailyDataFolder,
88  DirectoryInfo destinationFolder,
89  string market,
90  FileInfo blackListedTickersFile,
91  string reservedWordsPrefix,
92  IMapFileProvider mapFileProvider,
93  IFactorFileProvider factorFileProvider,
94  bool debugEnabled = false)
95  {
96  _blackListedTickersFile = blackListedTickersFile;
97  _market = market;
98  _factorFileProvider = factorFileProvider;
99  _mapFileProvider = mapFileProvider;
100  _destinationFolder = destinationFolder;
101  _dailyDataFolder = dailyDataFolder;
102 
103  Log.DebuggingEnabled = debugEnabled;
104  }
105 
106  /// <summary>
107  /// Runs this instance.
108  /// </summary>
109  /// <returns></returns>
110  public bool Run(out ConcurrentDictionary<SecurityIdentifier, List<CoarseFundamental>> coarsePerSecurity, out DateTime[] dates)
111  {
112  var startTime = DateTime.UtcNow;
113  var success = true;
114  Log.Trace($"CoarseUniverseGeneratorProgram.ProcessDailyFolder(): Processing: {_dailyDataFolder.FullName}");
115 
116  var symbolsProcessed = 0;
117  var filesRead = 0;
118  var dailyFilesNotFound = 0;
119  var coarseFilesGenerated = 0;
120 
121  var mapFileResolver = _mapFileProvider.Get(new AuxiliaryDataKey(_market, SecurityType.Equity));
122 
123  var result = coarsePerSecurity = new();
124  dates = Array.Empty<DateTime>();
125 
126  var blackListedTickers = new HashSet<string>();
127  if (_blackListedTickersFile.Exists)
128  {
129  blackListedTickers = File.ReadAllLines(_blackListedTickersFile.FullName).ToHashSet();
130  }
131 
132  var securityIdentifierContexts = PopulateSidContex(mapFileResolver, blackListedTickers);
133  var dailyPricesByTicker = new ConcurrentDictionary<string, List<TradeBar>>();
134  var outputCoarseContent = new ConcurrentDictionary<DateTime, List<CoarseFundamental>>();
135 
136  var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = Math.Max(1, Environment.ProcessorCount / 2) };
137  try
138  {
139  Parallel.ForEach(securityIdentifierContexts, parallelOptions, sidContext =>
140  {
141  var coarseForSecurity = new List<CoarseFundamental>();
142  var symbol = new Symbol(sidContext.SID, sidContext.LastTicker);
143  var symbolCount = Interlocked.Increment(ref symbolsProcessed);
144  Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Processing {symbol} with tickers: '{string.Join(",", sidContext.Tickers)}'");
145  var factorFile = _factorFileProvider.Get(symbol);
146 
147  // Populate dailyPricesByTicker with all daily data by ticker for all tickers of this security.
148  foreach (var ticker in sidContext.Tickers)
149  {
150  var pathFile = Path.Combine(_dailyDataFolder.FullName, $"{ticker}.zip");
151  var dailyFile = new FileInfo(pathFile);
152  if (!dailyFile.Exists)
153  {
154  Log.Debug($"CoarseUniverseGeneratorProgram.Run(): {dailyFile.FullName} not found, looking for daily data in data folder");
155 
156  dailyFile = new FileInfo(Path.Combine(Globals.DataFolder, "equity", "usa", "daily", $"{ticker}.zip"));
157  if (!dailyFile.Exists)
158  {
159  Log.Error($"CoarseUniverseGeneratorProgram.Run(): {dailyFile} not found!");
160  Interlocked.Increment(ref dailyFilesNotFound);
161  continue;
162  }
163  }
164 
165  if (!dailyPricesByTicker.ContainsKey(ticker))
166  {
167  dailyPricesByTicker.AddOrUpdate(ticker, ParseDailyFile(dailyFile));
168  Interlocked.Increment(ref filesRead);
169  }
170  }
171 
172  // Look for daily data for each ticker of the actual security
173  for (int mapFileRowIndex = sidContext.MapFileRows.Length - 1; mapFileRowIndex >= 1; mapFileRowIndex--)
174  {
175  var ticker = sidContext.MapFileRows[mapFileRowIndex].Item2.ToLowerInvariant();
176  var endDate = sidContext.MapFileRows[mapFileRowIndex].Item1;
177  var startDate = sidContext.MapFileRows[mapFileRowIndex - 1].Item1;
178  List<TradeBar> tickerDailyData;
179  if (!dailyPricesByTicker.TryGetValue(ticker, out tickerDailyData))
180  {
181  Log.Error($"CoarseUniverseGeneratorProgram.Run(): Daily data for ticker {ticker.ToUpperInvariant()} not found!");
182  continue;
183  }
184 
185  // Get daily data only for the time the ticker was
186  foreach (var tradeBar in tickerDailyData.Where(tb => tb.Time >= startDate && tb.Time <= endDate))
187  {
188  var coarseFundamental = GenerateFactorFileRow(ticker, sidContext, factorFile as CorporateFactorProvider, tradeBar);
189  coarseForSecurity.Add(coarseFundamental);
190 
191  outputCoarseContent.AddOrUpdate(tradeBar.Time,
192  new List<CoarseFundamental> { coarseFundamental },
193  (time, list) =>
194  {
195  lock (list)
196  {
197  list.Add(coarseFundamental);
198  return list;
199  }
200  });
201  }
202  }
203 
204  if(coarseForSecurity.Count > 0)
205  {
206  result[sidContext.SID] = coarseForSecurity;
207  }
208  if (symbolCount % 1000 == 0)
209  {
210  var elapsed = DateTime.UtcNow - startTime;
211  Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {symbolCount} in {elapsed:g} at {symbolCount / elapsed.TotalMinutes:F2} symbols/minute ");
212  }
213  });
214 
215  _destinationFolder.Create();
216  var startWriting = DateTime.UtcNow;
217  Parallel.ForEach(outputCoarseContent, coarseByDate =>
218  {
219  var filename = $"{coarseByDate.Key.ToString(DateFormat.EightCharacter, CultureInfo.InvariantCulture)}.csv";
220  var filePath = Path.Combine(_destinationFolder.FullName, filename);
221  Log.Debug($"CoarseUniverseGeneratorProgram.Run(): Saving {filename} with {coarseByDate.Value.Count} entries.");
222  File.WriteAllLines(filePath, coarseByDate.Value.Select(x => CoarseFundamental.ToRow(x)).OrderBy(cr => cr));
223  var filesCount = Interlocked.Increment(ref coarseFilesGenerated);
224  if (filesCount % 1000 == 0)
225  {
226  var elapsed = DateTime.UtcNow - startWriting;
227  Log.Trace($"CoarseUniverseGeneratorProgram.Run(): Processed {filesCount} in {elapsed:g} at {filesCount / elapsed.TotalSeconds:F2} files/second ");
228  }
229  });
230 
231  dates = outputCoarseContent.Keys.OrderBy(x => x).ToArray();
232  Log.Trace($"\n\nTotal of {coarseFilesGenerated} coarse files generated in {DateTime.UtcNow - startTime:g}:\n" +
233  $"\t => {filesRead} daily data files read.\n");
234  }
235  catch (Exception e)
236  {
237  Log.Error(e, $"CoarseUniverseGeneratorProgram.Run(): FAILED!");
238  success = false;
239  }
240 
241  return success;
242  }
243 
244  /// <summary>
245  /// Generates the factor file row.
246  /// </summary>
247  /// <param name="ticker">The ticker.</param>
248  /// <param name="sidContext">The sid context.</param>
249  /// <param name="factorFile">The factor file.</param>
250  /// <param name="tradeBar">The trade bar.</param>
251  /// <param name="fineAvailableDates">The fine available dates.</param>
252  /// <param name="fineFundamentalFolder">The fine fundamental folder.</param>
253  /// <returns></returns>
254  private static CoarseFundamental GenerateFactorFileRow(string ticker, SecurityIdentifierContext sidContext, CorporateFactorProvider factorFile, TradeBar tradeBar)
255  {
256  var date = tradeBar.Time;
257  var factorFileRow = factorFile?.GetScalingFactors(date);
258  var dollarVolume = Math.Truncate((double)(tradeBar.Close * tradeBar.Volume));
259  var priceFactor = factorFileRow?.PriceFactor.Normalize() ?? 1m;
260  var splitFactor = factorFileRow?.SplitFactor.Normalize() ?? 1m;
261  var hasFundamentalData = CheckFundamentalData(date, sidContext.SID);
262 
263  // sid,symbol,close,volume,dollar volume,has fundamental data,price factor,split factor
264  return new CoarseFundamentalSource
265  {
266  Symbol = new Symbol(sidContext.SID, ticker),
267  Value = tradeBar.Close.Normalize(),
268  Time = date,
269  VolumeSetter = decimal.ToInt64(tradeBar.Volume),
270  DollarVolumeSetter = dollarVolume,
271  PriceFactorSetter = priceFactor,
272  SplitFactorSetter = splitFactor,
273  HasFundamentalDataSetter = hasFundamentalData
274  };
275  }
276 
277  /// <summary>
278  /// Checks if there is fundamental data for
279  /// </summary>
280  /// <param name="date">The date.</param>
281  /// <param name="sid">The security identifier.</param>
282  /// <returns>True if fundamental data is available</returns>
283  private static bool CheckFundamentalData(DateTime date, SecurityIdentifier sid)
284  {
285  return !string.IsNullOrEmpty(FundamentalService.Get<string>(date, sid, HasFundamentalSource));
286  }
287 
288  /// <summary>
289  /// Parses the daily file.
290  /// </summary>
291  /// <param name="dailyFile">The daily file.</param>
292  /// <returns></returns>
293  private static List<TradeBar> ParseDailyFile(FileInfo dailyFile)
294  {
295  var scaleFactor = 1 / 10000m;
296 
297  var output = new List<TradeBar>();
298  using (var fileStream = dailyFile.OpenRead())
299  using (var stream = Compression.UnzipStreamToStreamReader(fileStream))
300  {
301  while (!stream.EndOfStream)
302  {
303  var tradeBar = new TradeBar
304  {
305  Time = stream.GetDateTime(),
306  Open = stream.GetDecimal() * scaleFactor,
307  High = stream.GetDecimal() * scaleFactor,
308  Low = stream.GetDecimal() * scaleFactor,
309  Close = stream.GetDecimal() * scaleFactor,
310  Volume = stream.GetDecimal()
311  };
312  output.Add(tradeBar);
313  }
314  }
315 
316  return output;
317  }
318 
319  /// <summary>
320  /// Populates the sid contex.
321  /// </summary>
322  /// <param name="mapFileResolver">The map file resolver.</param>
323  /// <param name="exclusions">The exclusions.</param>
324  /// <returns></returns>
325  private IEnumerable<SecurityIdentifierContext> PopulateSidContex(MapFileResolver mapFileResolver, HashSet<string> exclusions)
326  {
327  Log.Trace("CoarseUniverseGeneratorProgram.PopulateSidContex(): Generating SID context from QuantQuote's map files.");
328  foreach (var mapFile in mapFileResolver)
329  {
330  if (exclusions.Contains(mapFile.Last().MappedSymbol))
331  {
332  continue;
333  }
334 
335  yield return new SecurityIdentifierContext(mapFile, _market);
336  }
337  }
338  }
339 }