基本操作 update
// 查找有限个数的数据
for i in item_info.find().limit(300)
print(i['area'])
// 去掉数据源中带标点符号的数据
for i in item_info.find():
if i['area']:
area = [ i for i in i['area'] if i not in punctuation]
else:
area = ['不明']
print(area)
// update 数据数据库
db.collection.update()
e.g. {id:1, name:0, info:3}
update({id:1}, {$set: {name:2}} // 修改id为1的数据,将name改为2
for i in item_info.find():
if i['area']:
area = [ i for i in i['area'] if i not in punctuation]
else:
area = ['不明']
item_info.update({'_id':i['_id']},{'$set':{'area':area}})
// 获取不重复的area_list
area_list = []
for i in item_info.find():
area_list.append(i['area][0])
area_index = list(set(area_list))
post_times = []
for index in area_index:
post_times.append(area_list.count(index)
// 生成charts_data函数
def data_gen(type):
length = 0
if length <= len(area_index):
for area, times in zip(area_index, post_times):
data = {
'name': area,
'data': [times],
'type': type
}
yield data
length += 1
series = [data for data in data_gen('column')]
charts.plot(series, show='inline', options=dict(title=dict(text='Charts are Awesome!!!')))
基本操作 find
db.collection.find()
e.g.:
{id:1, name:0, info:3, cate:4}
// 查找id为1的数据,且只查看name和info字段,其余不看
find({id:1},{name:1, info:1})
result:
{id:1, name:0, info:3}
// 查看area,不看_id
for i in item_info.find({},{'area':1, '_id':0}).limit(300):
print(i)
// $slice $in 用法
for i in item_info.find({'pub_date':{'$in':{'2016.01.12','2016.01.14'}}},{'area':{'$slice':1}, '_id':0, 'price': 0, 'title': 0}).limit(300):
print(i)
基本操作 aggregate
db.collections.aggregate(pipeline)
pipeline = [
{$match: ?},
{$group: ?},
{$sort: ?},
{$limit: ?},
{$skip: ?},
{$unwind: ?},
{$redact: ?},
{$sample: ?},
{$out: ?}
]
pipeline = [
{ '$match': { '$and': [ 'pub_date': '2015.12.24'}, {'time': 3} ] } },
{ '$group': { '_id': '$price', 'count': {'$sum': 1} } },
{ '$sort': { 'counts': 1 } }, // -1 表示逆序,从大到小
{ '$limit': 3 }
]
for i in item_info.aggregate(pipeline):
print(i)
pipeline2 = [
{ '$match': { '$and': [ 'pub_date': '2015.12.24'}, {'time': 3} ] } },
{ '$group': { '_id': { '$slice':[ 'cates', 2, 1] }, 'count': {'$sum': 1} } },
{ '$sort': { 'counts': 1 } },
{ '$limit': 3 }
]
pipeline3 = [
{'$match':{'$and':[{'pub_date':{'$gte':'2015.12.25','$lte':'2015.12.27'}},{'area':{'$all':['朝阳']}}]}},
{'$group':{'_id':{'$slice':['$cates',2,1]},'counts':{'$sum':1}}},
{'$limit':3}
]
pipeline4 = [
{'$match':{'$and':[{'pub_date':{'$gte':'2015.12.25','$lte':'2015.12.27'}},
{'cates':{'$all':['北京二手手机']}},
{'look':{'$nin':['-']}}
]}},
{'$group':{'_id':'$look','avg_price':{'$avg':'$price'}}},
{'$sort':{'avg_price':-1}}
]
终端用法
// 启动mongod服务
mongod
mongo
// 数据库操作
show dbs
use ceshi
show collections
db.createCollections('item_info_copy')
{"ok":1}
db.item_info.copyTo('item_info_copy')
WANING: db.eval is deprecated.
导入导出
如何导出 csv ?
mongoexport -d database -c collection -o output/path.csv
mongoexport -d ceshi -c item_infoZ -o Users/Hou/Desktop/ddd.json