Dump Html Of Page Including Iframes
I'd like to dump the HTML contents of a web page, including the HTML of iframes included inside the
Solution 1:
You can use phantomjs to achieve this
Here is a code snippet from the phantom js server code.
var system = require('system');
var url = system.args[1] || '';
if(url.length > 0) {
var page = require('webpage').create();
page.open(url, function (status) {
if (status == 'success') {
var delay, checker = (function() {
var html = page.evaluate(function () {
var body = document.getElementsByTagName('body')[0];
if(body.getAttribute('data-status') == 'ready') {
returndocument.getElementsByTagName('html')[0].outerHTML;
}
});
if(html) {
clearTimeout(delay);
console.log(html);
phantom.exit();
}
});
delay = setInterval(checker, 100);
}
});
}
on the html you use the "data-status" attribute to let phantomjs know when the page is ready if the html belongs to you . The other option would be to use a nice timeout if the html page does not belong to you.
Post a Comment for "Dump Html Of Page Including Iframes"