这是不容易的,因为该
resource.received事件处理程序只提供元数据一样
url,
headers或者
status,但不是实际的数据。底层phantomjs事件处理程序的行为方式相同。
无状态AJAX请求
如果ajax调用 是无状态的 ,则可以重复该请求
casper.on("resource.received", function(resource){ // somehow identify this request, here: if it contains ".json" // it also also only does something when the stage is "end" otherwise this would be executed two times if (resource.url.indexOf(".json") != -1 && resource.stage == "end") { var data = casper.evaluate(function(url){ // synchronous GET request return __utils__.sendAJAX(url, "GET"); }, resource.url); // do something with data, you might need to JSON.parse(data) }});casper.start(url); // your script您可能需要将事件侦听器添加到中
resource.requested。这样,您就无需完成呼叫。
您也可以像这样在控制流中执行此操作源:A:CasperJSwaitForResource:如何获取我等待的资源:
casper.start(url);var res, resData;casper.waitForResource(function check(resource){ res = resource; return resource.url.indexOf(".json") != -1;}, function then(){ resData = casper.evaluate(function(url){ // synchronous GET request return __utils__.sendAJAX(url, "GET"); }, res.url); // do something with the data here or in a later step});casper.run();有状态的AJAX请求
如果 不是无状态的
,则需要替换XMLHttpRequest的实现。您将需要注入自己的
onreadystatechange处理程序实现,在page
window对象中收集信息,然后在另一个
evaluate调用中收集它。
您可能想要查看sinon.js中的XHR伪造者,或使用以下完整代理
XMLHttpRequest(我如何创建XMLHttpRequest包装器/代理中的方法3建模):
function replaceXHR(){ (function(window, debug){ function args(a){ var s = ""; for(var i = 0; i < a.length; i++) { s += "tn[" + i + "] => " + a[i]; } return s; } var _XMLHttpRequest = window.XMLHttpRequest; window.XMLHttpRequest = function() { this.xhr = new _XMLHttpRequest(); } // proxy ALL methods/properties var methods = [ "open", "abort", "setRequestHeader", "send", "addEventListener", "removeEventListener", "getResponseHeader", "getAllResponseHeaders", "dispatchEvent", "overrideMimeType" ]; methods.forEach(function(method){ window.XMLHttpRequest.prototype[method] = function() { if (debug) console.log("ARGUMENTS", method, args(arguments)); if (method == "open") { this._url = arguments[1]; } return this.xhr[method].apply(this.xhr, arguments); } }); // proxy change event handler Object.defineProperty(window.XMLHttpRequest.prototype, "onreadystatechange", { get: function(){ // this will probably never called return this.xhr.onreadystatechange; }, set: function(onreadystatechange){ var that = this.xhr; var realThis = this; that.onreadystatechange = function(){ // request is fully loaded if (that.readyState == 4) { if (debug) console.log("RESPonSE RECEIVED:", typeof that.responseText == "string" ? that.responseText.length : "none"); // there is a response and filter execution based on url if (that.responseText && realThis._url.indexOf("whatever") != -1) { window.myAwesomeResponse = that.responseText; } } onreadystatechange.call(that); }; } }); var otherscalars = [ "onabort", "onerror", "onload", "onloadstart", "onloadend", "onprogress", "readyState", "responseText", "responseType", "responseXML", "status", "statusText", "upload", "withCredentials", "DONE", "UNSENT", "HEADERS_RECEIVED", "LOADING", "OPENED" ]; otherscalars.forEach(function(scalar){ Object.defineProperty(window.XMLHttpRequest.prototype, scalar, { get: function(){ return this.xhr[scalar]; }, set: function(obj){ this.xhr[scalar] = obj; } }); }); })(window, false);}如果您想一开始就捕获AJAX调用,则需要将其添加到第一个事件处理程序中
casper.on("page.initialized", function(resource){ this.evaluate(replaceXHR);});或
evaluate(replaceXHR)在需要时使用。
控制流如下所示:
function replaceXHR(){ }casper.start(yourUrl, function(){ this.evaluate(replaceXHR);});function getAwesomeResponse(){ return this.evaluate(function(){ return window.myAwesomeResponse; });}// stops waiting if window.myAwesomeResponse is something that evaluates to truecasper.waitFor(getAwesomeResponse, function then(){ var data = JSON.parse(getAwesomeResponse()); // Do something with data});casper.run();如上所述,我为XMLHttpRequest创建了一个代理,以便每次在页面上使用它时,我都可以对其进行处理。您抓取的页面使用
xhr.onreadystatechange回调来接收数据。通过定义一个特定的setter函数来完成代理,该setter函数将接收到的数据写入
window.myAwesomeResponse页面上下文中。您唯一需要做的就是检索此文本。
JSONP请求
如果您知道前缀(使用加载的JSON调用的函数,例如
insert({"data":["Some", "JSON","here"],"id":"asdasda")),则为JSONP编写代理甚至更加容易。您可以insert在页面上下文中覆盖
页面加载后
casper.start(url).then(function(){this.evaluate(function(){ var oldInsert = insert; insert = function(json){ window.myAwesomeResponse = json; oldInsert.apply(window, arguments); };});}).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();或在接收到请求之前(如果在调用请求之前就注册了该函数)
casper.on("resource.requested", function(resource){// filter on the correct callif (resource.url.indexOf(".jsonp") != -1) { this.evaluate(function(){ var oldInsert = insert; insert = function(json){ window.myAwesomeResponse = json; oldInsert.apply(window, arguments); }; });}}).run();
casper.start(url).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();



