22 using System.Collections;
23 using System.Collections.Generic;
24 using System.Globalization;
26 using System.Reflection;
36 private static PyString _empty;
37 private static PyObject _pandas;
38 private static PyObject _pandasColumn;
39 private static PyObject _seriesFactory;
40 private static PyObject _dataFrameFactory;
41 private static PyObject _multiIndexFactory;
42 private static PyObject _multiIndex;
43 private static PyObject _indexFactory;
45 private static PyList _defaultNames;
46 private static PyList _level1Names;
47 private static PyList _level2Names;
48 private static PyList _level3Names;
50 private readonly
static Dictionary<Type, List<DataTypeMember>> _membersCache =
new();
52 private readonly
static MemberInfo _tickLastPriceMember = typeof(
Tick).GetProperty(nameof(
Tick.
LastPrice));
53 private readonly
static MemberInfo _openInterestLastPriceMember = typeof(
OpenInterest).GetProperty(nameof(
Tick.
LastPrice));
55 private static readonly
string[] _nonLeanDataTypeForcedMemberNames =
new[] { nameof(
BaseData.
Value) };
57 private readonly
static string[] _quoteTickOnlyPropertes =
new[] {
68 private static readonly IReadOnlyCollection<DateTime> EmptySeriesTimesKey =
new List<DateTime>();
69 private static readonly List<DataTypeMember> EmptyDataTypeMembers =
new List<DataTypeMember>();
71 private readonly
Symbol _symbol;
72 private readonly
bool _isFundamentalType;
73 private readonly
bool _isBaseData;
74 private readonly
bool _timeAsColumn;
75 private readonly Dictionary<string, Serie> _series;
77 private readonly Dictionary<Type, List<DataTypeMember>> _members =
new();
97 _pandas = Py.Import(
"PandasMapper");
98 _pandasColumn = _pandas.GetAttr(
"PandasColumn");
99 _seriesFactory = _pandas.GetAttr(
"Series");
100 _dataFrameFactory = _pandas.GetAttr(
"DataFrame");
101 _multiIndex = _pandas.GetAttr(
"MultiIndex");
102 _multiIndexFactory = _multiIndex.GetAttr(
"from_tuples");
103 _indexFactory = _pandas.GetAttr(
"Index");
104 _empty =
new PyString(
string.Empty);
106 var time =
new PyString(
"time");
107 var symbol =
new PyString(
"symbol");
108 var expiry =
new PyString(
"expiry");
109 _defaultNames =
new PyList(
new PyObject[] { expiry,
new PyString(
"strike"),
new PyString(
"type"), symbol, time });
110 _level1Names =
new PyList(
new PyObject[] { symbol });
111 _level2Names =
new PyList(
new PyObject[] { symbol, time });
112 _level3Names =
new PyList(
new PyObject[] { expiry, symbol, time });
126 if (baseData ==
null && data is IEnumerable enumerable)
128 foreach (var item
in enumerable)
136 var type = data.GetType();
138 _isBaseData = baseData !=
null;
139 _timeAsColumn = timeAsColumn && _isBaseData;
140 _symbol = _isBaseData ? baseData.Symbol : ((
ISymbolProvider)data).Symbol;
143 if (baseData ==
null)
151 else if (_symbol.SecurityType.IsOption())
161 public void Add(
object data)
166 private void Add(
object data,
bool overrideValues)
173 var typeMembers = GetInstanceDataTypeMembers(data);
175 var endTime =
default(DateTime);
181 AddToSeries(
"time", endTime, endTime, overrideValues);
185 AddMembersData(data, typeMembers, endTime, overrideValues);
189 var storage = dynamicData.GetStorageDictionary();
190 var value = dynamicData.Value;
191 AddToSeries(
"value", endTime, value, overrideValues);
193 foreach (var kvp
in storage.Where(x => x.Key !=
"value"
195 && !x.Key.StartsWith(
"__", StringComparison.InvariantCulture)))
197 AddToSeries(kvp.Key, endTime, kvp.Value, overrideValues);
202 private void AddMemberToSeries(
object instance, DateTime endTime, DataTypeMember member,
bool overrideValues)
204 var baseName = (string)
null;
205 var tick = member.IsTickProperty ? instance as
Tick :
null;
206 if (tick !=
null && member.IsTickLastPrice && tick.TickType ==
TickType.OpenInterest)
208 baseName =
"OpenInterest";
212 var key = member.GetMemberName(baseName);
213 var value = member.GetValue(instance);
215 var memberType = member.GetMemberType();
217 if (MemberIsDataDictionary(memberType))
219 value = memberType.GetProperty(
"Values").GetValue(value);
221 else if (member.IsProperty)
225 value = timeDependentProperty.
Clone(
new FixedTimeProvider(endTime));
227 else if (member.IsTickProperty && tick !=
null)
229 if (tick.TickType !=
TickType.Quote && _quoteTickOnlyPropertes.Contains(member.Member.Name))
233 else if (member.IsTickLastPrice)
235 var nullValueKey = tick.TickType !=
TickType.OpenInterest
236 ? member.GetMemberName(
"OpenInterest")
237 : member.GetMemberName();
238 AddToSeries(nullValueKey, endTime,
null, overrideValues);
243 AddToSeries(key, endTime, value, overrideValues);
255 Add(tradeBar, overrideValues:
true);
266 using var _ = Py.GIL();
268 PyObject[] indexTemplate;
270 var names = _defaultNames;
274 names = _level1Names;
275 indexTemplate = GetIndexTemplate(_symbol);
277 else if (levels == 2)
280 names = _level2Names;
281 indexTemplate = GetIndexTemplate(_symbol,
null);
283 else if (levels == 3)
286 names = _level3Names;
287 indexTemplate = GetIndexTemplate(_symbol.ID.Date, _symbol,
null);
293 indexTemplate = GetIndexTemplate(_symbol.ID.Date,
null,
null, _symbol,
null);
295 else if (_symbol.SecurityType.IsOption())
297 indexTemplate = GetIndexTemplate(_symbol.ID.Date, _symbol.ID.StrikePrice, _symbol.ID.OptionRight, _symbol,
null);
301 indexTemplate = GetIndexTemplate(
null,
null,
null, _symbol,
null);
305 names =
new PyList(names.SkipLast(names.Count() > 1 && _timeAsColumn ? 1 : 0).ToArray());
310 using var pyDict =
new PyDict();
311 foreach (var (seriesName, serie) in _series)
313 if (filterMissingValueColumns && serie.ShouldFilter)
continue;
315 var key = serie.Times ?? EmptySeriesTimesKey;
316 if (!indexCache.TryGetValue(key, out var index))
321 indexSource = serie.Values.Select(_ => CreateIndexSourceValue(DateTime.MinValue, indexTemplate)).ToPyListUnSafe();
325 indexSource = serie.Times.Select(time => CreateIndexSourceValue(time, indexTemplate)).ToPyListUnSafe();
328 if (indexTemplate.Length == 1)
330 using var nameDic = Py.kw(
"name", names[0]);
331 index = _indexFactory.Invoke(
new[] { indexSource }, nameDic);
335 using var namesDic = Py.kw(
"names", names);
336 index = _multiIndexFactory.Invoke(
new[] { indexSource }, namesDic);
339 indexCache[key] = index;
341 foreach (var pyObject
in indexSource)
345 indexSource.Dispose();
349 using var pyvalues =
new PyList();
350 for (var i = 0; i < serie.Values.Count; i++)
352 using var pyObject = serie.Values[i].ToPython();
353 pyvalues.Append(pyObject);
355 using var series = _seriesFactory.Invoke(pyvalues, index);
356 using var pyStrKey = seriesName.ToPython();
357 using var pyKey = _pandasColumn.Invoke(pyStrKey);
358 pyDict.SetItem(pyKey, series);
361 foreach (var kvp
in indexCache)
366 for (var i = 0; i < indexTemplate.Length; i++)
368 DisposeIfNotEmpty(indexTemplate[i]);
373 var result = _dataFrameFactory.Invoke(pyDict);
375 foreach (var item
in pyDict)
387 public static PyObject
ToPandasDataFrame(IEnumerable<PandasData> pandasDatas,
bool skipTimesColumn =
false)
389 using var _ = Py.GIL();
391 using var list = pandasDatas.Select(x => x._symbol).ToPyListUnSafe();
393 using var namesDic = Py.kw(
"name", _level1Names[0]);
394 using var index = _indexFactory.Invoke(
new[] { list }, namesDic);
396 var valuesPerSeries =
new Dictionary<string, PyList>();
397 var seriesToSkip =
new Dictionary<string, bool>();
398 foreach (var pandasData
in pandasDatas)
400 foreach (var kvp
in pandasData._series)
402 if (skipTimesColumn && kvp.Key ==
"time")
407 if (seriesToSkip.ContainsKey(kvp.Key))
409 seriesToSkip[kvp.Key] &= kvp.Value.ShouldFilter;
413 seriesToSkip[kvp.Key] = kvp.Value.ShouldFilter;
416 if (!valuesPerSeries.TryGetValue(kvp.Key, out PyList value))
419 value = valuesPerSeries[kvp.Key] =
new PyList();
422 if (kvp.Value.Values.Count > 0)
425 using var valueOfSymbol = kvp.Value.Values[0].ToPython();
426 value.Append(valueOfSymbol);
430 value.Append(PyObject.None);
435 using var pyDict =
new PyDict();
436 foreach (var kvp
in valuesPerSeries)
438 if (seriesToSkip.TryGetValue(kvp.Key, out var skip) && skip)
443 using var series = _seriesFactory.Invoke(kvp.Value, index);
444 using var pyStrKey = kvp.Key.ToPython();
445 using var pyKey = _pandasColumn.Invoke(pyStrKey);
446 pyDict.SetItem(pyKey, series);
450 var result = _dataFrameFactory.Invoke(pyDict);
453 using var dropnaKwargs = Py.kw(
"axis", 1,
"inplace",
true,
"how",
"all");
454 result.GetAttr(
"dropna").Invoke(Array.Empty<PyObject>(), dropnaKwargs);
459 private List<DataTypeMember> GetInstanceDataTypeMembers(
object data)
461 var type = data.GetType();
462 if (!_members.TryGetValue(type, out var members))
464 HashSet<string> columnNames;
468 columnNames = (data as
DynamicData)?.GetStorageDictionary()
470 .Where(x => !x.Key.StartsWith(
"__", StringComparison.InvariantCulture)).ToHashSet(x => x.Key);
471 columnNames.Add(
"value");
472 members = EmptyDataTypeMembers;
476 members = GetTypeMembers(type);
477 columnNames = members.SelectMany(x => x.GetMemberNames()).ToHashSet();
481 columnNames.Add(
"openinterest");
485 _members[type] = members;
489 columnNames.Add(
"time");
492 foreach (var columnName
in columnNames)
494 _series.TryAdd(columnName,
new Serie(withTimeIndex: !_timeAsColumn));
505 private List<DataTypeMember> GetTypeMembers(Type type)
507 List<DataTypeMember> typeMembers;
510 if (!_membersCache.TryGetValue(type, out typeMembers))
513 ? Array.Empty<
string>()
514 : _nonLeanDataTypeForcedMemberNames;
515 typeMembers = GetDataTypeMembers(type, forcedInclusionMembers).ToList();
516 _membersCache[type] = typeMembers;
520 _members[type] = typeMembers;
528 private static IEnumerable<DataTypeMember> GetDataTypeMembers(Type type,
string[] forcedInclusionMembers)
531 .GetMembers(BindingFlags.Instance | BindingFlags.Public)
532 .Where(x => x.MemberType == MemberTypes.Field || x.MemberType == MemberTypes.Property)
533 .Where(x => forcedInclusionMembers.Contains(x.Name)
534 || (!x.IsDefined(PandasIgnoreAttribute) && !x.DeclaringType.IsDefined(PandasIgnoreMembersAttribute)));
539 var dataTypeMember = CreateDataTypeMember(member);
540 var memberType = dataTypeMember.GetMemberType();
543 if (memberType.IsClass
544 && (memberType.Namespace ==
null
547 || (memberType.Namespace.StartsWith(
"QuantConnect.", StringComparison.InvariantCulture)
548 && !memberType.IsDefined(PandasNonExpandableAttribute)
549 && !member.IsDefined(PandasNonExpandableAttribute))))
551 dataTypeMember = CreateDataTypeMember(member, GetDataTypeMembers(memberType, forcedInclusionMembers).ToArray());
554 return (memberType, dataTypeMember);
558 .GroupBy(x => x.memberType, x => x.dataTypeMember)
559 .SelectMany(grouping =>
561 var typeProperties = grouping.ToList();
562 if (typeProperties.Count > 1)
564 var propertiesToExpand = typeProperties.Where(x => x.ShouldBeUnwrapped).ToList();
565 if (propertiesToExpand.Count > 1)
567 foreach (var property
in propertiesToExpand)
569 property.SetPrefix();
574 return typeProperties;
582 private void AddMembersData(
object instance, IEnumerable<DataTypeMember> members, DateTime endTime,
bool overrideValues)
584 foreach (var member
in members)
586 if (!member.ShouldBeUnwrapped)
588 AddMemberToSeries(instance, endTime, member, overrideValues);
592 var memberValue = member.GetValue(instance);
593 if (memberValue !=
null)
595 AddMembersData(memberValue, member.Children, endTime, overrideValues);
604 private static void DisposeIfNotEmpty(PyObject pyObject)
606 if (!ReferenceEquals(pyObject, _empty))
612 private static bool MemberIsDataDictionary(Type memberType)
614 while (memberType !=
null && !memberType.IsValueType)
616 if (memberType.IsGenericType && memberType.GetGenericTypeDefinition() == typeof(
DataDictionary<>))
620 memberType = memberType.BaseType;
626 private PyObject[] GetIndexTemplate(params
object[] args)
628 return args.SkipLast(args.Length > 1 && _timeAsColumn ? 1 : 0).Select(x => x?.ToPython() ?? _empty).ToArray();
634 private PyObject CreateIndexSourceValue(DateTime index, PyObject[] list)
636 if (!_timeAsColumn && list.Length > 1)
638 DisposeIfNotEmpty(list[^1]);
639 list[^1] = index.ToPython();
644 return new PyTuple(list.ToArray());
647 return list[0].ToPython();
656 private void AddToSeries(
string key, DateTime time,
object input,
bool overrideValues)
658 if (!_series.TryGetValue(key, out var serie))
660 throw new ArgumentException($
"PandasData.AddToSeries(): {Messages.PandasData.KeyNotFoundInSeries(key)}");
663 serie.Add(time, input, overrideValues);
668 private static readonly IFormatProvider InvariantCulture = CultureInfo.InvariantCulture;
670 public bool ShouldFilter {
get;
private set; }
671 public List<DateTime> Times {
get; }
672 public List<object> Values {
get; }
674 public Serie(
bool withTimeIndex =
true)
684 public void Add(DateTime time,
object input,
bool overrideValues)
686 var value = input is decimal ? Convert.ToDouble(input, InvariantCulture) : input;
690 if (value is
double doubleValue)
692 if (!doubleValue.IsNaNOrZero())
694 ShouldFilter =
false;
697 else if (value is
string stringValue)
699 if (!
string.IsNullOrWhiteSpace(stringValue))
701 ShouldFilter =
false;
704 else if (value is
bool boolValue)
708 ShouldFilter =
false;
711 else if (value !=
null)
713 if (value is ICollection enumerable)
715 if (enumerable.Count != 0)
717 ShouldFilter =
false;
722 ShouldFilter =
false;
727 if (overrideValues && Times !=
null && Times.Count > 0 && Times[^1] == time)
740 private class FixedTimeProvider : ITimeProvider
742 private readonly DateTime _time;
743 public DateTime GetUtcNow() => _time;
744 public FixedTimeProvider(DateTime time)