>>> import pymongo
启动mongo服务:
$ mongod
与mongoclient连接:
>>> from pymongo import MongoClient
>>> client = MongoClient()
>>> client = MongoClient('localhost', 27017)
>>> client = MongoClient('mongodb://localhost:27017/')
取得数据库:
>>> db = client.test_database
>>> db = client['test-database']
取得表:
>>> collection = db.test_collection
>>> collection = db['test-collection']
插入:
>>> uid = {'id':'5189090045'}
>>> collection.insert_one(uid)
查找:
>> uid2 = {'id':'123456789'}
>>> collection.insert_one(uid2)
# 表中现在有两条数据
>>> collection.find_one() # 取得第一条数据
{'_id': ObjectId('58b500e37dffdd2be832cbf7'), 'id': '5189090045'}
>>> collection.find_one({'id':'123456789'}) #取得指定数据
{'_id': ObjectId('58b501c17dffdd2be832cbf8'), 'id': '123456789'}
删除:
collection.remove(collection.find_one({'id':'123456789'}))
eg:
import requests
from bs4 import BeautifulSoup
import pymongo
from pymongo import MongoClient
from time import time
start = time()
client = MongoClient('localhost', 27017)
db = client.DoubanMovie
collection = db.Single
posts = db.posts
for page in list(x*25 for x in range(0,10)):
url = 'https://movie.douban.com/top250?start='+str(page)+'&filter='
r = requests.get(url)
soup = BeautifulSoup(r.text,"lxml")
for info in soup.find_all(class_='info'):
name = info.find('a').text.strip('\n')
introduction = info.find('p').text.strip()
score = info.find(class_='rating_num').text
try:
inq = info.find(class_='inq').text
except AttributeError:
inq = ''
post = {
'name':name,
'introduction':introduction,
'score':score,
'inq':inq,
}
posts.insert_one(post)
end = time()
print ('Cost {} seconds'.format((end - start)))
# Cost 4.598546266555786 seconds