栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 面试经验 > 面试问答

Android-使用JSOUP解析JS生成的网址

面试问答 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

Android-使用JSOUP解析JS生成的网址

请参阅下面的UPDATE ,第一个/可接受的解决方案不符合android的要求,但仅供参考。)


桌面解决方案

HtmlUnit似乎无法处理此站点(最近经常发生这种情况)。所以我也没有一个简单的Java解决方案,但是您可以使用PhantomJS:为您的操作系统下载二进制文件,创建一个脚本文件,从Java代码中启动进程,并使用dom解析器(如jsoup)解析输出。

脚本文件(这里称为simple.js):

var page = require('webpage').create();var fs = require('fs');var system = require('system');var url = "";var fileName = "output";// first parameter: url// second parameter: filename for outputconsole.log("args length: " + system.args.length);if (system.args.length > 1) {    url=system.args[1];}if (system.args.length > 2){    fileName=system.args[2];}if(url===""){    phantom.exit();}page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36';page.settings.loadImages = false;page.open(url, function(status) {    console.log("Status: " + status);    if(status === "success") {        var path = fileName+'.html';        fs.write(path, page.content, 'w');    }    phantom.exit();});

Java代码(获取标题和Cover-URL的示例):

try {    //change path to phantomjs binary and your script file    String outputFileName = "srulad";    String phantomJSPath = "phantomjs" + File.separator + "bin" + File.separator + "phantomjs";    String scriptFile = "simple.js";    String urlParameter = "http://srulad.com/#page-2";    new File(outputFileName+".html").delete();    Process process = Runtime.getRuntime().exec(phantomJSPath + " " + scriptFile + " " + urlParameter + " " + outputFileName);    process.waitFor();    document doc = Jsoup.parse(new File(outputFileName + ".html"),"UTF-8"); // output.html is created by phantom.js, same path as page.js    Elements elements = doc.select("#list_page-2 > div");    for (Element element : elements) {        System.out.println(element.select("div.l-description.float-left > div:nth-child(1) > a").first().attr("title"));        System.out.println(element.select("div.l-image.float-left > a > img.lazy").first().attr("data-original"));    }} catch (IOException | InterruptedException e) {    e.printStackTrace();}

输出:

სიყვარული და მოწყალება / Love & Mercyhttp://srulad.com/assets/uploads/42410_Love_and_Mercy.jpgმუზა / The Musehttp://srulad.com/assets/uploads/43164_large_qRzsimNz0eDyFLFJcbVLIxlqii.jpg...

更新

使用WebView和jsoup可以在Android中解析具有基于javascript的动态内容的网站。以下示例应用程序使用启用了Javascript的WebView呈现依赖Java的网站。使用JavascriptInterface,将返回html源,并用jsoup进行解析,作为概念证明,标题和封面图像的网址用于填充ListView。这些按钮减少或增加页码将触发ListView的更新。
注意:已在Android 5.1.1 / API 22设备上测试。

向您的AndroidManifest.xml添加互联网权限

<uses-permission android:name="android.permission.INTERNET" />

activity_main.xml

<?xml version="1.0" encoding="utf-8"?><LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"    android:orientation="vertical"    android:layout_width="match_parent"    android:layout_height="match_parent">    <LinearLayout        android:orientation="horizontal"        android:layout_width="match_parent"        android:layout_height="wrap_content">        <Button android:layout_width="wrap_content" android:layout_height="wrap_content" android:text="@string/page_down" android:id="@+id/buttonDown" android:layout_weight="0.5" />        <Button android:layout_width="wrap_content" android:layout_height="wrap_content" android:text="@string/page_up" android:id="@+id/buttonUp" android:layout_weight="0.5" />    </LinearLayout>    <ListView        android:layout_width="match_parent"        android:layout_height="0dp"        android:id="@+id/listView"        android:layout_gravity="bottom"        android:layout_weight="0.5" /></LinearLayout>

MainActivity.java

public class MainActivity extends AppCompatActivity {    private final Handler uiHandler = new Handler();    private ArrayAdapter<String> adapter;    private ArrayList<String> entries = new ArrayList<>();    private ProgressDialog progressDialog;    private class JSHtmlInterface {        @android.webkit.JavascriptInterface        public void showHTML(String html) { final String htmlContent = html; uiHandler.post(     new Runnable() {         @Override         public void run() {  document doc = Jsoup.parse(htmlContent);  Elements elements = doc.select("#online_movies > div > div");  entries.clear();  for (Element element : elements) {      String title = element.select("div.l-description.float-left > div:nth-child(1) > a").first().attr("title");      String imgUrl = element.select("div.l-image.float-left > a > img.lazy").first().attr("data-original");      entries.add(title + "n" + imgUrl);  }  adapter.notifyDataSetChanged();         }     } );        }    }    @Override    protected void onCreate(Bundle savedInstanceState) {        super.onCreate(savedInstanceState);        setContentView(R.layout.activity_main);        ListView listView = (ListView) findViewById(R.id.listView);        adapter = new ArrayAdapter<>(this, android.R.layout.simple_list_item_1, android.R.id.text1, entries);        listView.setAdapter(adapter);        progressDialog = ProgressDialog.show(this, "Loading","Please wait...", true);        progressDialog.setCancelable(false);        try { final WebView browser = new WebView(this); browser.setVisibility(View.INVISIBLE); browser.setLayerType(View.LAYER_TYPE_NONE,null); browser.getSettings().setJavascriptEnabled(true); browser.getSettings().setBlockNetworkImage(true); browser.getSettings().setDomStorageEnabled(false); browser.getSettings().setCacheMode(WebSettings.LOAD_NO_CACHE); browser.getSettings().setLoadsImagesAutomatically(false); browser.getSettings().setGeolocationEnabled(false); browser.getSettings().setSupportZoom(false); browser.addJavascriptInterface(new JSHtmlInterface(), "JSBridge"); browser.setWebViewClient(     new WebViewClient() {         @Override         public void onPageStarted(WebView view, String url, Bitmap favicon) {  progressDialog.show();  super.onPageStarted(view, url, favicon);         }         @Override         public void onPageFinished(WebView view, String url) {  browser.loadUrl("javascript:window.JSBridge.showHTML('<html>'+document.getElementsByTagName('html')[0].innerHTML+'</html>');");  progressDialog.dismiss();         }     } ); findViewById(R.id.buttonDown).setonClickListener(new View.onClickListener() {     @Override     public void onClick(View view) {         uiHandler.post(new Runnable() {  @Override  public void run() {      int page = Integer.parseInt(browser.getUrl().split("-")[1]);      int newPage = page > 1 ? page-1 : 1;      browser.loadUrl("http://srulad.com/#page-" + newPage);      browser.loadUrl(browser.getUrl()); // not sure why this is needed, but doesn't update without it on my device      if(getSupportActionBar()!=null) getSupportActionBar().setTitle(browser.getUrl());  }         });     } }); findViewById(R.id.buttonUp).setonClickListener(new View.onClickListener() {     @Override     public void onClick(View view) {         uiHandler.post(new Runnable() {  @Override  public void run() {      int page = Integer.parseInt(browser.getUrl().split("-")[1]);      int newPage = page+1;      browser.loadUrl("http://srulad.com/#page-" + newPage);      browser.loadUrl(browser.getUrl()); // not sure why this is needed, but doesn't update without it on my device      if(getSupportActionBar()!=null) getSupportActionBar().setTitle(browser.getUrl());  }         });     } }); browser.loadUrl("http://srulad.com/#page-1"); if(getSupportActionBar()!=null) getSupportActionBar().setTitle(browser.getUrl());        } catch (Exception e) { e.printStackTrace();        }    }}


转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/464830.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号