一、json to txt
解决的问题
1)文件夹目录下有json格式数据和jpg数据,读取json数据时会报错
解决方法:增加json文件后缀判断
for json_name in json_names:
if json_name[-4:]=='json':
2)pandas数据用append增加行无效
解决方法:需要重新对pd进行赋值更新
label=label.append(new_label,ignore_index=True)
3)json数据坐标存在缺失,本应有四个值
报错:
x2 = float((i['points'][1][0])) / img_w
IndexError: list index out of range
解决方法:使用try-except捕捉错误
4)pandas索引存在重复
if i['label'] not in label:
new_label=pd.DataFrame(columns=['label'], data=[i['label']])
label=label.append(new_label,ignore_index=True)
解决方法:
if (label['label'] != i['label']).all():
5)存储label名称到txt文件时,为缩略的形式
txt_name = 'label.txt' # 生成txt文件你想存放的路径
txt_file = open(txt_name, 'w')
txt_file.write(str(label['label'])+ '\n')
解决方法:
label.to_csv('label.txt', sep='\t', index=True)
二、源代码
import json
import os
import pandas as pd
def convert(img_size, box):
# dw = 1. / (img_size[0])
# dh = 1. / (img_size[1])
# x = (box[0] + box[2]) / 2.0 - 1
# y = (box[1] + box[3]) / 2.0 - 1
# w = box[2] - box[0]
# h = box[3] - box[1]
# x = x * dw
# w = w * dw
# y = y * dh
# h = h * dh
x1 = box[0]
y1 = box[1]
x2 = box[2]
y2 = box[3]
return (x1, y1, x2, y2)
def decode_json(json_floder_path, json_name,label):
txt_name = r'C:\Users\Tycoon\Desktop\Taxi invoice\train/' + json_name[0:-5] + '.txt' # 生成txt文件你想存放的路径
txt_file = open(txt_name, 'w')
json_path = os.path.join(json_floder_path, json_name)
data = json.load(open(json_path, 'r'))
img_w = data['imageWidth']
img_h = data['imageHeight']
for i in data['shapes']:
if i['shape_type'] == 'rectangle':
if (label['label'] != i['label']).all():
new_label=pd.DataFrame(columns=['label'], data=[i['label']])
label=label.append(new_label,ignore_index=True)
try:
x1 = float((i['points'][0][0])) / img_w
y1 = float((i['points'][0][1])) / img_h
x2 = float((i['points'][1][0])) / img_w
y2 = float((i['points'][1][1])) / img_h
n = label[label['label']==i['label']].index[0]
bb = (x1, y1, x2, y2)
bbox = convert((img_w, img_h), bb)
txt_file.write(str(n) + " " + " ".join([str(a) for a in bbox]) + '\n')
except IndexError:
print(json_name[0:-5]+'的'+i['label']+"标签坐标缺失")
return label
if __name__ == "__main__":
json_floder_path = r'C:\Users\Tycoon\Desktop\Taxi invoice\train_dataset\已标注训练集汇总' # json文件的路径
json_names = os.listdir(json_floder_path)
label= pd.DataFrame(columns = ['label'])
for json_name in json_names:
if json_name[-4:]=='json':
print(json_name)
label=decode_json(json_floder_path, json_name,label)
label.to_csv('label.txt', sep='\t', index=True) # 生成txt文件你想存放的路径