Lean  $LEAN_TAG$
PandasConverter.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using Python.Runtime;
17 using QuantConnect.Data;
19 using QuantConnect.Util;
20 using System;
21 using System.Collections;
22 using System.Collections.Generic;
23 using System.Linq;
24 
25 namespace QuantConnect.Python
26 {
27  /// <summary>
28  /// Collection of methods that converts lists of objects in pandas.DataFrame
29  /// </summary>
30  public partial class PandasConverter
31  {
32  private static dynamic _pandas;
33  private static PyObject _concat;
34 
35  /// <summary>
36  /// Initializes the <see cref="PandasConverter"/> class
37  /// </summary>
38  static PandasConverter()
39  {
40  using (Py.GIL())
41  {
42  var pandas = Py.Import("pandas");
43  _pandas = pandas;
44  // keep it so we don't need to ask for it each time
45  _concat = pandas.GetAttr("concat");
46  }
47  }
48 
49  /// <summary>
50  /// Converts an enumerable of <see cref="Slice"/> in a pandas.DataFrame
51  /// </summary>
52  /// <param name="data">Enumerable of <see cref="Slice"/></param>
53  /// <param name="flatten">Whether to flatten collections into rows and columns</param>
54  /// <param name="dataType">Optional type of bars to add to the data frame
55  /// If true, the base data items time will be ignored and only the base data collection time will be used in the index</param>
56  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
57  public PyObject GetDataFrame(IEnumerable<Slice> data, bool flatten = false, Type dataType = null)
58  {
59  var generator = new DataFrameGenerator(data, flatten, dataType);
60  return generator.GenerateDataFrame();
61  }
62 
63  /// <summary>
64  /// Converts an enumerable of <see cref="IBaseData"/> in a pandas.DataFrame
65  /// </summary>
66  /// <param name="data">Enumerable of <see cref="Slice"/></param>
67  /// <param name="symbolOnlyIndex">Whether to make the index only the symbol, without time or any other index levels</param>
68  /// <param name="forceMultiValueSymbol">Useful when the data contains points for multiple symbols.
69  /// If false and <paramref name="symbolOnlyIndex"/> is true, it will assume there is a single point for each symbol,
70  /// and will apply performance improvements for the data frame generation.</param>
71  /// <param name="flatten">Whether to flatten collections into rows and columns</param>
72  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
73  /// <remarks>Helper method for testing</remarks>
74  public PyObject GetDataFrame<T>(IEnumerable<T> data, bool symbolOnlyIndex = false, bool forceMultiValueSymbol = false, bool flatten = false)
75  where T : ISymbolProvider
76  {
77  var generator = new DataFrameGenerator<T>(data, flatten);
78  return generator.GenerateDataFrame(
79  // Use 2 instead of maxLevels for backwards compatibility
80  levels: symbolOnlyIndex ? 1 : 2,
81  sort: false,
82  symbolOnlyIndex: symbolOnlyIndex,
83  forceMultiValueSymbol: forceMultiValueSymbol);
84  }
85 
86  /// <summary>
87  /// Converts a dictionary with a list of <see cref="IndicatorDataPoint"/> in a pandas.DataFrame
88  /// </summary>
89  /// <param name="data">Dictionary with a list of <see cref="IndicatorDataPoint"/></param>
90  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
91  public PyObject GetIndicatorDataFrame(IEnumerable<KeyValuePair<string, List<IndicatorDataPoint>>> data)
92  {
93  using (Py.GIL())
94  {
95  using var pyDict = new PyDict();
96 
97  foreach (var kvp in data)
98  {
99  AddSeriesToPyDict(kvp.Key, kvp.Value, pyDict);
100  }
101 
102  return MakeIndicatorDataFrame(pyDict);
103  }
104  }
105 
106  /// <summary>
107  /// Converts a dictionary with a list of <see cref="IndicatorDataPoint"/> in a pandas.DataFrame
108  /// </summary>
109  /// <param name="data"><see cref="PyObject"/> that should be a dictionary (convertible to PyDict) of string to list of <see cref="IndicatorDataPoint"/></param>
110  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
111  public PyObject GetIndicatorDataFrame(PyObject data)
112  {
113  using (Py.GIL())
114  {
115  using var inputPythonType = data.GetPythonType();
116  var inputTypeStr = inputPythonType.ToString();
117  var targetTypeStr = nameof(PyDict);
118  PyObject currentKvp = null;
119 
120  try
121  {
122  using var pyDictData = new PyDict(data);
123  using var seriesPyDict = new PyDict();
124 
125  targetTypeStr = $"{nameof(String)}: {nameof(List<IndicatorDataPoint>)}";
126 
127  foreach (var kvp in pyDictData.Items())
128  {
129  currentKvp = kvp;
130  AddSeriesToPyDict(kvp[0].As<string>(), kvp[1].As<List<IndicatorDataPoint>>(), seriesPyDict);
131  }
132 
133  return MakeIndicatorDataFrame(seriesPyDict);
134  }
135  catch (Exception e)
136  {
137  if (currentKvp != null)
138  {
139  inputTypeStr = $"{currentKvp[0].GetPythonType()}: {currentKvp[1].GetPythonType()}";
140  }
141 
142  throw new ArgumentException(Messages.PandasConverter.ConvertToDictionaryFailed(inputTypeStr, targetTypeStr, e.Message), e);
143  }
144  }
145  }
146 
147  /// <summary>
148  /// Returns a string that represent the current object
149  /// </summary>
150  /// <returns></returns>
151  public override string ToString()
152  {
153  if (_pandas == null)
154  {
156  }
157 
158  using (Py.GIL())
159  {
160  return _pandas.Repr();
161  }
162  }
163 
164  /// <summary>
165  /// Concatenates multiple data frames
166  /// </summary>
167  /// <param name="dataFrames">The data frames to concatenate</param>
168  /// <param name="keys">
169  /// Optional new keys for a new multi-index level that would be added
170  /// to index each individual data frame in the resulting one
171  /// </param>
172  /// <param name="names">The optional names of the new index level (and the existing ones if they need to be changed)</param>
173  /// <param name="sort">Whether to sort the resulting data frame</param>
174  /// <param name="dropna">Whether to drop columns containing NA values only (Nan, None, etc)</param>
175  /// <returns>A new data frame result from concatenating the input</returns>
176  public static PyObject ConcatDataFrames<T>(IEnumerable<PyObject> dataFrames, IEnumerable<T> keys, IEnumerable<string> names,
177  bool sort = true, bool dropna = true)
178  {
179  using (Py.GIL())
180  {
181  using var pyDataFrames = dataFrames.ToPyListUnSafe();
182 
183  if (pyDataFrames.Length() == 0)
184  {
185  return _pandas.DataFrame();
186  }
187 
188  using var kwargs = Py.kw("sort", sort);
189  PyList pyKeys = null;
190  PyList pyNames = null;
191 
192  try
193  {
194  if (keys != null && names != null)
195  {
196  pyNames = names.ToPyListUnSafe();
197  pyKeys = ConvertConcatKeys(keys);
198  using var pyFalse = false.ToPython();
199 
200  kwargs.SetItem("keys", pyKeys);
201  kwargs.SetItem("names", pyNames);
202  kwargs.SetItem("copy", pyFalse);
203  }
204 
205  var result = _concat.Invoke(new[] { pyDataFrames }, kwargs);
206 
207  // Drop columns with only NaN or None values
208  if (dropna)
209  {
210  using var dropnaKwargs = Py.kw("axis", 1, "inplace", true, "how", "all");
211  result.GetAttr("dropna").Invoke(Array.Empty<PyObject>(), dropnaKwargs);
212  }
213 
214  return result;
215  }
216  finally
217  {
218  pyKeys?.Dispose();
219  pyNames?.Dispose();
220  }
221  }
222  }
223 
224  public static PyObject ConcatDataFrames(IEnumerable<PyObject> dataFrames, bool sort = true, bool dropna = true)
225  {
226  return ConcatDataFrames<string>(dataFrames, null, null, sort, dropna);
227  }
228 
229  /// <summary>
230  /// Creates the list of keys required for the pd.concat method, making sure that if the items are enumerables,
231  /// they are converted to Python tuples so that they are used as levels for a multi index
232  /// </summary>
233  private static PyList ConvertConcatKeys(IEnumerable<IEnumerable<object>> keys)
234  {
235  var keyTuples = keys.Select(x => new PyTuple(x.Select(y => y.ToPython()).ToArray()));
236  try
237  {
238  return keyTuples.ToPyListUnSafe();
239  }
240  finally
241  {
242  foreach (var tuple in keyTuples)
243  {
244  foreach (var x in tuple)
245  {
246  x.DisposeSafely();
247  }
248  tuple.DisposeSafely();
249  }
250  }
251  }
252 
253  private static PyList ConvertConcatKeys<T>(IEnumerable<T> keys)
254  {
255  if ((typeof(T).IsAssignableTo(typeof(IEnumerable)) && !typeof(T).IsAssignableTo(typeof(string))))
256  {
257  return ConvertConcatKeys(keys.Cast<IEnumerable<object>>());
258  }
259 
260  return keys.ToPyListUnSafe();
261  }
262 
263  /// <summary>
264  /// Creates a series from a list of <see cref="IndicatorDataPoint"/> and adds it to the
265  /// <see cref="PyDict"/> as the value of the given <paramref name="key"/>
266  /// </summary>
267  /// <param name="key">Key to insert in the <see cref="PyDict"/></param>
268  /// <param name="points">List of <see cref="IndicatorDataPoint"/> that will make up the resulting series</param>
269  /// <param name="pyDict"><see cref="PyDict"/> where the resulting key-value pair will be inserted into</param>
270  private void AddSeriesToPyDict(string key, List<IndicatorDataPoint> points, PyDict pyDict)
271  {
272  var index = new List<DateTime>();
273  var values = new List<double>();
274 
275  foreach (var point in points)
276  {
277  index.Add(point.EndTime);
278  values.Add((double) point.Value);
279  }
280  pyDict.SetItem(key.ToLowerInvariant(), _pandas.Series(values, index));
281  }
282 
283  /// <summary>
284  /// Converts a <see cref="PyDict"/> of string to pandas.Series in a pandas.DataFrame
285  /// </summary>
286  /// <param name="pyDict"><see cref="PyDict"/> of string to pandas.Series</param>
287  /// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
288  private PyObject MakeIndicatorDataFrame(PyDict pyDict)
289  {
290  return _pandas.DataFrame(pyDict, columns: pyDict.Keys().Select(x => x.As<string>().ToLowerInvariant()).OrderBy(x => x));
291  }
292  }
293 }