22 using System.Collections;
23 using System.Collections.Generic;
28 public partial class PandasConverter
33 private class DataFrameGenerator
35 private static readonly
string[] MultiBaseDataCollectionDataFrameNames =
new[] {
"collection_symbol",
"time" };
36 private static readonly
string[] MultiCanonicalSymbolsDataFrameNames =
new[] {
"canonical",
"time" };
37 private static readonly
string[] SingleBaseDataCollectionDataFrameNames =
new[] {
"time" };
39 private readonly Type _dataType;
40 private readonly
bool _requestedTick;
41 private readonly
bool _requestedQuoteBar;
42 private readonly
bool _requestedTradeBar;
43 private readonly
bool _timeAsColumn;
48 private Dictionary<Symbol, PandasData> _pandasData;
49 private List<(Symbol Symbol, DateTime Time, IEnumerable<ISymbolProvider>
Data)> _collections;
51 private int _maxLevels;
52 private bool _shouldUseSymbolOnlyIndex;
53 private readonly
bool _flatten;
55 protected DataFrameGenerator(Type dataType =
null,
bool timeAsColumn =
false,
bool flatten =
false)
59 _requestedTick = dataType ==
null || dataType == typeof(
Tick) || dataType == typeof(
OpenInterest);
60 _requestedTradeBar = dataType ==
null || dataType == typeof(
TradeBar);
61 _requestedQuoteBar = dataType ==
null || dataType == typeof(
QuoteBar);
62 _timeAsColumn = timeAsColumn;
66 public DataFrameGenerator(IEnumerable<Slice> slices,
bool flatten =
false, Type dataType =
null)
67 : this(dataType, flatten: flatten)
77 protected void AddData(IEnumerable<Slice> slices)
79 HashSet<SecurityIdentifier> addedData =
null;
81 foreach (var slice
in slices)
83 foreach (var data
in slice.AllData)
85 if (_flatten && IsCollection(data.GetType()))
87 AddCollection(data.Symbol, data.EndTime, (data as IEnumerable).Cast<
ISymbolProvider>());
91 var pandasData = GetPandasData(data);
92 if (pandasData.IsCustomData || (_requestedTick && data is
Tick))
98 if (!_requestedTradeBar && !_requestedQuoteBar && _dataType !=
null && data.GetType().IsAssignableTo(_dataType))
101 pandasData.Add(data);
106 if (_requestedTradeBar && _requestedQuoteBar)
109 if (!addedData.Add(data.Symbol.ID))
117 var tradeBar = _requestedTradeBar ? data as
TradeBar :
null;
118 if (tradeBar !=
null)
120 slice.QuoteBars.TryGetValue(tradeBar.Symbol, out quoteBar);
124 quoteBar = _requestedQuoteBar ? data as
QuoteBar :
null;
125 if (quoteBar !=
null)
127 slice.Bars.TryGetValue(quoteBar.
Symbol, out tradeBar);
130 pandasData.Add(tradeBar, quoteBar);
143 protected void AddData<T>(IEnumerable<T> data)
146 var type = typeof(T);
147 var isCollection = IsCollection(type);
149 if (_flatten && isCollection)
151 foreach (var collection
in data)
153 var baseData = collection as
BaseData;
154 var collectionData = collection as IEnumerable;
155 AddCollection(baseData.Symbol, baseData.EndTime, collectionData.Cast<
ISymbolProvider>());
160 Symbol prevSymbol =
null;
161 PandasData prevPandasData =
null;
162 foreach (var item
in data)
164 var pandasData = prevSymbol !=
null && item.Symbol == prevSymbol ? prevPandasData : GetPandasData(item);
165 pandasData.Add(item);
166 prevSymbol = item.Symbol;
167 prevPandasData = pandasData;
171 if (_pandasData !=
null && _pandasData.Count > 1)
173 _shouldUseSymbolOnlyIndex =
true;
189 public PyObject GenerateDataFrame(
int? levels =
null,
bool sort =
true,
bool filterMissingValueColumns =
true,
190 bool symbolOnlyIndex =
false,
bool forceMultiValueSymbol =
false)
192 using var _ = Py.GIL();
194 var pandasDataDataFrames = GetPandasDataDataFrames(levels, filterMissingValueColumns, symbolOnlyIndex, forceMultiValueSymbol).ToList();
195 var collectionsDataFrames = GetCollectionsDataFrames(symbolOnlyIndex, forceMultiValueSymbol).ToList();
199 if (collectionsDataFrames.Count == 0)
201 return ConcatDataFrames(pandasDataDataFrames, sort, dropna:
true);
204 var dataFrames = collectionsDataFrames.Select(x => x.Item3).Concat(pandasDataDataFrames);
208 return ConcatDataFrames(dataFrames, sort, dropna:
true);
210 else if (_collections.DistinctBy(x => x.Symbol).Count() > 1)
212 var keys = collectionsDataFrames
213 .Select(x =>
new object[] { x.Item1, x.Item2 })
214 .Concat(pandasDataDataFrames.Select(x =>
new object[] { x, DateTime.MinValue }));
215 var names = _collections.Any(x => x.Symbol.IsCanonical())
216 ? MultiCanonicalSymbolsDataFrameNames
217 : MultiBaseDataCollectionDataFrameNames;
219 return ConcatDataFrames(dataFrames, keys, names, sort, dropna:
true);
223 var keys = collectionsDataFrames
224 .Select(x =>
new object[] { x.Item2 })
225 .Concat(pandasDataDataFrames.Select(x =>
new object[] { DateTime.MinValue }));
227 return ConcatDataFrames(dataFrames, keys, SingleBaseDataCollectionDataFrameNames, sort, dropna:
true);
232 foreach (var df
in pandasDataDataFrames.Concat(collectionsDataFrames.Select(x => x.Item3)))
242 private IEnumerable<PyObject> GetPandasDataDataFrames(
int? levels,
bool filterMissingValueColumns,
bool symbolOnlyIndex,
bool forceMultiValueSymbol)
244 if (_pandasData is
null || _pandasData.Count == 0)
249 if (!forceMultiValueSymbol && (symbolOnlyIndex || _shouldUseSymbolOnlyIndex))
251 yield
return PandasData.ToPandasDataFrame(_pandasData.Values, skipTimesColumn:
true);
255 foreach (var data
in _pandasData.Values)
257 yield
return data.ToPandasDataFrame(levels ?? _maxLevels, filterMissingValueColumns);
264 private IEnumerable<(Symbol, DateTime, PyObject)> GetCollectionsDataFrames(
bool symbolOnlyIndex,
bool forceMultiValueSymbol)
266 if (_collections is
null || _collections.Count == 0)
271 foreach (var (symbol, time, data) in _collections.GroupBy(x => x.Symbol).SelectMany(x => x))
273 var generator =
new DataFrameGenerator(_dataType, timeAsColumn: !symbolOnlyIndex, flatten: _flatten);
274 generator.AddData(data);
275 var dataFrame = generator.GenerateDataFrame(symbolOnlyIndex: symbolOnlyIndex, forceMultiValueSymbol: forceMultiValueSymbol);
277 yield
return (symbol, time, dataFrame);
283 _pandasData ??=
new();
284 if (!_pandasData.TryGetValue(data.
Symbol, out var pandasData))
286 pandasData =
new PandasData(data, _timeAsColumn);
287 _pandasData[data.
Symbol] = pandasData;
288 _maxLevels = Math.Max(_maxLevels, pandasData.Levels);
294 private void AddCollection(Symbol symbol, DateTime time, IEnumerable<ISymbolProvider> data)
296 _collections ??=
new();
297 _collections.Add((symbol, time, data));
307 private static bool IsCollection(Type type)
309 return type.IsAssignableTo(typeof(
BaseData)) &&
310 type.GetInterfaces().Any(x => x.IsGenericType &&
311 x.GetGenericTypeDefinition().IsAssignableTo(typeof(IEnumerable<>)) &&
316 private class DataFrameGenerator<T> : DataFrameGenerator
319 public DataFrameGenerator(IEnumerable<T> data,
bool flatten)
320 : base(flatten: flatten)