我有一个像这样的输入数据框:
CUSTOMER_ID PRODUCT_ID VENDOR_ID DAT ORDER_ID COLOR_ID
0 10078229 508136536 450 2018-11-23 20183200576771 1000
1 10078229 508136532 450 2018-11-23 20183200576771 1000
2 10202280 506894206 450 2018-11-23 20183231461778 1000
3 10207584 500970872 2097 2018-11-23 20183231430937 1002
4 10207584 500970872 2097 2018-11-23 20183231430937 1000
5 10268028 511131122 450 2018-11-23 20183231418341 1000
6 10268028 509736876 450 2018-11-23 20183231418341 1000
7 10268028 507095754 450 2018-11-23 20183231418341 1000
8 10268028 513902792 450 2018-11-23 20183231418341 1000
9 10383692 508229004 450 2018-11-23 20183190670154 1000
我想要一个像这样的3级json输出作为我的输出:
[
{
"CUSTOMER_ID": 10078229,
"ORDER" : [
{
"ORDER_ID": 20183200576771,
"DAT": "2018-11-23",
"PRODUCT": [
{
"PRODUCT_ID": 508136536,
"COLOR_ID": 1000,
"SIZE_ID" : 1002
},
{
"PRODUCT_ID": 508136532,
"COLOR_ID": 1000,
"SIZE_ID" : 1003
}
],
"VENDOR": [
{
"VENDOR_ID" : 1234
},
{
"VENDOR_ID" : 12345
} ]
},
{
"ORDER_ID" : 2222 ...
} ]
}
, "CUSTOMER_ID" : 12345 ....
]
我能够在标题级别创建一个带有客户的json,在另一个级别创建其他列.但我不能做客户/订单/(产品和供应商)等层次结构.
有人有提示吗?
谢谢,
编辑:
从现在开始我做了什么(我尝试用客户创建1个json):
df_final1=pd.DataFrame(columns=['CUSTOMER_ID','JSON_EVENT'])
for i in data_test['CUSTOMER_ID'].unique():
for j in data_test[['ORDER_ID','DAT']].apply(lambda x: ' '.join(x.astype(str)), axis=1).unique():
a=pd.DataFrame([[i,
data_test[(data_test[['ORDER_ID','DAT']].apply(lambda x: ' '.join(x.astype(str)), axis=1)==j) & (data_test['CUSTOMER_ID']==i) ]
.groupby(['ORDER_ID','DAT'])['PRODUCT_ID',
'VENDOR_ID','SIZE','COLOR'].apply(
lambda x: x.to_dict(orient='records')).reset_index('PRODUITS NON NOTES').to_json(orient='records')]],
columns=['CUSTOMER_ID','JSON_EVENT'])
df_final1=df_final1.append(a,ignore_index=True)
最佳答案 你可以使用itertools.groupby:
from itertools import groupby, count
import json
_count = count(1002)
data = df_final1.reset_index().values.tolist()
#[[10078229, 508136536, 450, '2018-11-23', 20183200576771, 1000], [10078229, 508136532, 450, '2018-11-23', 20183200576771, 1000], [10202280, 506894206, 450, '2018-11-23', 20183231461778, 1000], [10207584, 500970872, 2097, '2018-11-23', 20183231430937, 1002], [10207584, 500970872, 2097, '2018-11-23', 20183231430937, 1000], [10268028, 511131122, 450, '2018-11-23', 20183231418341, 1000], [10268028, 509736876, 450, '2018-11-23', 20183231418341, 1000], [10268028, 507095754, 450, '2018-11-23', 20183231418341, 1000], [10268028, 513902792, 450, '2018-11-23', 20183231418341, 1000], [10383692, 508229004, 450, '2018-11-23', 20183190670154, 1000]]
grouped = [[a, [c for _, *c in b]] for a, b in groupby(data, key=lambda x:x[0])]
new_grouped = [[a, [[c, list(d)] for c, d in groupby(b, key=lambda _k:_k[-2])]] for a, b in grouped]
final_structure = [{'CUSTOMER_ID':a, 'ORDER':[{'ORDER_ID':c, 'DAT':b[0][2],
'PRODUCT':[{'PRODUCT_ID':h, 'COLOR_ID':j, 'SIZE_ID':next(_count)} for h, *_, j in b],
'VENDOR':[h[1] for h in b]} for c, b in j]} for a, j in new_grouped]
print(json.dumps(final_structure, indent=4))
输出:
[
{
"CUSTOMER_ID": 10078229,
"ORDER": [
{
"ORDER_ID": 20183200576771,
"DAT": "2018-11-23",
"PRODUCT": [
{
"PRODUCT_ID": 508136536,
"COLOR_ID": 1000,
"SIZE_ID": 1002
},
{
"PRODUCT_ID": 508136532,
"COLOR_ID": 1000,
"SIZE_ID": 1003
}
],
"VENDOR": [
450,
450
]
}
]
},
{
"CUSTOMER_ID": 10202280,
"ORDER": [
{
"ORDER_ID": 20183231461778,
"DAT": "2018-11-23",
"PRODUCT": [
{
"PRODUCT_ID": 506894206,
"COLOR_ID": 1000,
"SIZE_ID": 1004
}
],
"VENDOR": [
450
]
}
]
},
{
"CUSTOMER_ID": 10207584,
"ORDER": [
{
"ORDER_ID": 20183231430937,
"DAT": "2018-11-23",
"PRODUCT": [
{
"PRODUCT_ID": 500970872,
"COLOR_ID": 1002,
"SIZE_ID": 1005
},
{
"PRODUCT_ID": 500970872,
"COLOR_ID": 1000,
"SIZE_ID": 1006
}
],
"VENDOR": [
2097,
2097
]
}
]
},
{
"CUSTOMER_ID": 10268028,
"ORDER": [
{
"ORDER_ID": 20183231418341,
"DAT": "2018-11-23",
"PRODUCT": [
{
"PRODUCT_ID": 511131122,
"COLOR_ID": 1000,
"SIZE_ID": 1006
},
{
"PRODUCT_ID": 509736876,
"COLOR_ID": 1000,
"SIZE_ID": 1007
},
{
"PRODUCT_ID": 507095754,
"COLOR_ID": 1000,
"SIZE_ID": 1008
},
{
"PRODUCT_ID": 513902792,
"COLOR_ID": 1000,
"SIZE_ID": 1009
}
],
"VENDOR": [
450,
450,
450,
450
]
}
]
},
{
"CUSTOMER_ID": 10383692,
"ORDER": [
{
"ORDER_ID": 20183190670154,
"DAT": "2018-11-23",
"PRODUCT": [
{
"PRODUCT_ID": 508229004,
"COLOR_ID": 1000,
"SIZE_ID": 1010
}
],
"VENDOR": [
450
]
}
]
}
]