最近想做自己的一个应用市场,但是苦于,没有应用数据,因此,用jsoup来抓取,腾讯应用市场的软件APP各种数据,
挺不错的。哈哈。你要啥数据都有哈。 不错。
直接看代码
封装了 下代码。
大家直接输入,腾讯应用宝的 具体软件地址,即可,进行 解析了。
package com.ferris.event.app;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.ferris.entity.AppDetail;
import com.ferris.entity.AppDownloadInfo;
import com.ferris.utils.StringUtils;
public class AppGetService {
public static final String urlhead = "http://android.myapp.com/myapp/";
private static final Object lock = new Object();
public AppGetService() {
// TODO Auto-generated constructor stub
}
public static List<AppDownloadInfo> getNetApp(String app) {
synchronized (lock) {
List<AppDownloadInfo> liAppDownloadInfos = new ArrayList<AppDownloadInfo>();
Document doc;
try {
doc = Jsoup.connect(app).get();
Elements ListDiv = doc.getElementsByAttributeValue("class",
"app-info-desc");
for (Element element : ListDiv) {
AppDownloadInfo appDownloadInfo = new AppDownloadInfo();
appDownloadInfo.setAppname(element.select("a").attr(
"appname"));
appDownloadInfo.setAppsize(element.getElementsByClass(
"size").text());
appDownloadInfo.setTypename(element.getElementsByClass(
"download").text());
appDownloadInfo.setAppmd5(urlhead
+ element.select("a").attr("href"));
appDownloadInfo.setIconurl(element.select("a").attr(
"appicon"));
appDownloadInfo.setPackagename(element.select("a").attr(
"apk"));
// http://dd.myapp.com/16891/548EBCD7DD5F97E652615EECBD352905.apk?fsname=com%2Etencent%2Emobileqq%5F5%2E2%2E1%5F182.apk&asr=8eff
String downloadurl=element.select("a").attr(
"ex_url");
if(!StringUtils.isEmpty(downloadurl)){
appDownloadInfo.setDownloadUrl(downloadurl.substring(0, downloadurl.indexOf("?")));
}
liAppDownloadInfos.add(appDownloadInfo);
appDownloadInfo = null;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return null;
}
return liAppDownloadInfos;
}
}
public static AppDetail getNetAppDetail(String appdetail) {
synchronized (lock) {
AppDetail appDetail = new AppDetail();
Elements listDiv = null;
Document doc;
try {
doc = Jsoup.connect(appdetail).get();
if (doc == null) {
return null;
}
List<String> tupian = new ArrayList<String>();
listDiv = doc.getElementsByAttributeValue("class",
"pic-img-box");
if (listDiv != null && listDiv.size() > 0) {
for (Element element : listDiv) {
tupian.add(element.select("img").attr("data-src"));
}
appDetail.setPreviewurl(tupian);
listDiv = null;
}
listDiv = doc.getElementsByAttributeValue("class",
"det-othinfo-data");
if (listDiv != null && listDiv.size() > 0) {
final List<String> string = new ArrayList<String>();// 获取到
// //
// 版本号,以及开发商
int z = 1;
for (Element element : listDiv) {
if (element.childNodeSize() > 0) {
if (z == 1) {
appDetail.setAppversion(element.text());
++z;
}
if (z == 2) {
appDetail.setKaifashang(element.text());
}
}
}
listDiv = null;
}
listDiv = doc.getElementsByAttributeValue("class",
"det-app-data-info");
if (listDiv != null && listDiv.size() > 0) {
for (Element element : listDiv) {
if (element.childNodeSize() > 0) {
appDetail.setDescription(element.text());
}
}
listDiv = null;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return appDetail;
}
}
}