numpy
- np.full([3,4],0.)是可以直接切片赋值值为小数的数组,而np.full(shape=[],0)是不可以赋值小数的数组。
- 归一化。
def normalization(data):
minV = data.min(0)
maxV = data.max(0)
ranges = maxV - minV
normData = np.zeros(np.shape(data))
m = data.shape[0]
normData = data - np.tile(minV,(m,1))
normData = normData / np.tile(ranges,(m,1))
return normData
- 使用pandas的rolling返回不了两列数据。可以使用numpy stride实现rolling功能。
from numpy.lib.stride_tricks import as_strided as stride
def roll_np(df: pd.DataFrame, apply_func: callable, window: int, return_col_num: int, **kwargs):
"""
rolling with multiple columns on 2 dim pd.Dataframe
* the result can apply the function which can return pd.Series with multiple columns
call apply function with numpy ndarray
:param return_col_num: 返回的列数
:param apply_func:
:param df:
:param window
:param kwargs:
:return:
"""
v = df.reset_index().values
dim0, dim1 = v.shape
stride0, stride1 = v.strides
stride_values = stride(v, (dim0 - (window - 1), window, dim1), (stride0, stride0, stride1))
result_values = np.full((dim0, return_col_num), np.nan)
for idx, values in enumerate(stride_values, window - 1):
# values : col 1 is index, other is value
result_values[idx,] = apply_func(values, **kwargs)
return result_values
def own_func_np(narr, **kwargs):
# c = narr[:, 1]
c = narr[:,1][0]
d = narr[:,2][-1]
h = np.max(narr[:,3])
l = np.min(narr[:,4])
# d = narr[:, 2]
return np.array([c,d,h,l])
stride函数(array,shape,strides)。
enumerate(iteration,start),index从start 开始。