可以使用一个numpy数组作为索引数组去过滤原数组,索引数组里为true的值,保留,为false的值去掉
import numpy as np
使用索引数组
a = np.array([1, 2, 3, 4])
b = np.array([True, True, False, False])
print a[b] #[1 2]
print a[np.array([True, False, True, False])] #[1 3]
通过对原数组进行向量化运算得到索引数组
a = np.array([1, 2, 3, 2, 1])
b = (a >= 2)
print a[b] #[2 3 2]
print a[a >= 2] #[2 3 2]
通过对某一数组进行向量化运算得到索引数组
a = np.array([1, 2, 3, 4, 5])
b = np.array([1, 2, 3, 2, 1])
print b == 2 #[False True False True False]
print a[b == 2] #[2 4]
一个例子:
# 20个学生在课程上所花费的时间
time_spent = np.array([
12.89697233, 0. , 64.55043217, 0. ,
24.2315615 , 39.991625 , 0. , 0. ,
147.20683783, 0. , 0. , 0. ,
45.18261617, 157.60454283, 133.2434615 , 52.85000767,
0. , 54.9204785 , 26.78142417, 0.
])
# 20个学生参加学习的天数
days_to_cancel = np.array([
4, 5, 37, 3, 12, 4, 35, 38, 5, 37, 3, 3, 68,
38, 98, 2, 249, 2, 127, 35
])
def mean_time_for_paid_students(time_spent, days_to_cancel):
'''
计算参加课程大于等于7天的学生平均在课程上所花的时间
'''
index_array = days_to_cancel >= 7
mean_time = time_spent[index_array].mean()
return mean_time
print(mean_time_for_paid_students(time_spent, days_to_cancel))
# 结果: 41.0540034855