Skip to content

Commit

Permalink
all new x-ray
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewmueller committed Feb 4, 2015
1 parent 427e7f2 commit 6377062
Show file tree
Hide file tree
Showing 10 changed files with 195 additions and 679 deletions.
492 changes: 0 additions & 492 deletions index.js

Large diffs are not rendered by default.

35 changes: 0 additions & 35 deletions lib/adapters/curl.js

This file was deleted.

60 changes: 0 additions & 60 deletions lib/adapters/phantom.js

This file was deleted.

36 changes: 36 additions & 0 deletions lib/request.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Module Dependencies
*/

var Superagent = require('superagent');

/**
* Export the default `driver`
*/

module.exports = driver;

/**
* Initialize the default
* `driver` makes an GET
* request using superagent
*
* @param {Object} opts
* @return {Function} plugin
*/

function driver(opts) {
var superagent = Superagent.agent(opts);

return function plugin(xray) {

xray.request = function request(url, fn) {
superagent.get(url, function(err, res) {
if (err) return fn(err);
else return fn(null, res.text);
});
};

return xray;
}
}
59 changes: 41 additions & 18 deletions lib/x-ray.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
*/

var debug = require('debug')('x-ray');
var curl = require('./adapters/curl.js');
var assign = require('object-assign');
var Select = require('x-ray-select');
var request = require('./request');
var cheerio = require('cheerio');
var isArray = Array.isArray;
var noop = function() {};
Expand Down Expand Up @@ -74,12 +73,14 @@ Xray.prototype.run = function(fn) {

// each selection
function next(json) {
debug('appending json');
out = out.concat(json);
pop = first && !isArray(json) ? true : false;
first = false;
}

function done(err) {
debug('finishing up');
if (err) return fn(err);

// if we only have one item and it's not an array,
Expand All @@ -92,26 +93,38 @@ Xray.prototype.run = function(fn) {

/**
* Write
*
* @param {String|Stream} file
* @return {Stream}
*/

Xray.prototype.write = function(file) {
var stream = 'string' == typeof file ? fs.createWriteStream(file) : file;
var written = true;
var written = false;

this.traverse(next, done);

return stream;

function next(json) {
var str = JSON.stringify(json, true, 2);
debug('writing to the stream');
var str = JSON.stringify(json, true, 2).slice(1, -1);
if (!written) stream.write('[\n' + str);
else stream.write('\n,\n' + str);
written = true;
}

function done(err) {
if (err) throw err;
if (err) {
stream.emit('error', err);
stream.end();
return;
}

debug('finishing up');
if (written) stream.write('\n]\n');
stream.end();
return;
}
}

Expand All @@ -120,48 +133,58 @@ Xray.prototype.write = function(file) {
*/

Xray.prototype.traverse = function(fn, done) {
var request = this.request ? this.request : (this.request = this.use(curl()).request);
var limit = this.paginateEl ? this._limit : 1;
var paginate = this.paginateEl;
var throws = this._throws;
var get = this.request ? this.request : (this.request = this.use(request()).request);
var limit = this._paginate ? this._limit : 1;
var paginate = this._paginate;
var selects = this.selects;
var throws = this._throws;
var url = this.url;
var self = this;

// initial request
request(url, next);
debug('initial request: %s', url);
get(url, next);

function next(err, body) {
if (err && throws) return done(err);
console.log(body);
debug('received response');

var $ = load(body);
var select = Select($.html());
var select = Select($);
var json = select(selects);
var href = select(paginate);

// check the pagination
if (--limit <= 0) return fn(json), done(null);
var href = select(paginate);
if (!href) return fn(json), done(null);
if (--limit <= 0) {
debug('reached limit, finishing up.');
return fn(json), done(null);
}

if (!href) {
debug('no next page, finishing up.')
return fn(json), done(null);
}

// callback and continue
fn(json);

// delay
setTimeout(function() {
debug('next page: %s', href);
request(href, next);
debug('requesting next page: %s', href);
get(href, next);
}, self.delay());
}

function load(body) {
debug('loading body');
var $;

// sanitize
try {
$ = cheerio.load(body);
$ = absolutes(url, noscript($));
} catch (e) {
debug('load error: %s', e.message);
if (throws) return done(e);
}

Expand Down Expand Up @@ -194,7 +217,7 @@ Xray.prototype.throws = function(throws) {
Xray.prototype.delay = function delay(from, to) {
if (arguments.length) {
this.from = from;
this.to = to;
this.to = to || from;
return this;
} else {
return Math.floor(Math.random() * this.to) + this.from;
Expand Down
10 changes: 2 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,9 @@
},
"dependencies": {
"cheerio": "^0.17.0",
"component-type": "^1.1.0",
"debug": "^2.0.0",
"delegates": "^0.1.0",
"extend.js": "0.0.2",
"lodash": "^2.4.1",
"nightmare": "^1.6.5",
"object-assign": "^2.0.0",
"traverse": "^0.6.6",
"x-ray-select": "^1.0.0"
"superagent": "^0.21.0",
"x-ray-select": "^1.0.2"
},
"devDependencies": {
"mocha": "*",
Expand Down
21 changes: 21 additions & 0 deletions test/fixtures/paginate.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
var assert = require('assert');

exports.input = [{
$root: '.repo-list-item',
title: '.repo-list-name',
link: '.repo-list-name a[href]',
description: '.repo-list-description',
meta: {
$root: '.repo-list-meta',
starredOn: 'time'
}
}];

exports.expected = function(arr) {
assert(arr.length >= 60, 'array length (' + arr.length + ') not large enough');
arr.map(function(arr) {
assert(~arr.link.indexOf('https://github.com/'), 'invalid link');
assert(arr.title.length, 'title not there');
assert(arr.meta.starredOn.length, 'starred on not there');
})
}
20 changes: 20 additions & 0 deletions test/fixtures/select-keys.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
exports.input = [{
$root: ".item",
link: 'a[href]',
thumb: 'img[src]',
content: {
$root: '.item-content',
title: 'h2',
body: 'section'
},
tags: ['.item-tags li']
}];

exports.expected = {
link: 'http://ift.tt/1xIsboY',
thumb: 'http://www.google.com/s2/favicons?domain=http://ift.tt/1xIsboY',
content:
{ title: 'The 100 Best Children\'s Books of All Time',
body: 'Relive your childhood with TIME\'s list of the best 100 children\'s books of all time http://t.co/NEvBhNM4np http://ift.tt/1sk3xdM\n\n— TIME.com (@TIME) January 11, 2015' },
tags: [ 'twitter' ]
}
Empty file removed test/index.js
Empty file.
Loading

0 comments on commit 6377062

Please sign in to comment.