extract-content-javascript flow
@@ -1,885 +1,29 @@
/*
* @title extract-content-javascript flow
- * @description extract-content-javascriptのテスト
+ * @description extract-content-javascript (forked)
* @include http://*
- * @license MIT License
- * @require
+ * @license MIT License http://www.opensource.org/licenses/mit-license
+ * @require https://raw.github.com/hatena/extract-content-javascript/master/lib/extract-content-all.js
+ * @contributor efcl http://let.hatelabo.jp/efcl/let/gYC-yqSn9_ytBw
+ * @author noromanba
*/
-// http://github.com/hatena/extract-content-javascript/
-if (typeof ExtractContentJS == 'undefined') {
- var ExtractContentJS = {};
-}
-if (typeof ExtractContentJS.Lib == 'undefined') {
- ExtractContentJS.Lib = {};
-}
-
-ExtractContentJS.Lib.Util = (function() {
- var Util = {};
- Util.BenchmarkTimer = function() {
- var now = function() {
- var d = new Date();
- var t = 0;
- t = d.getHours();
- t = t*60 + d.getMinutes();
- t = t*60 + d.getSeconds();
- t = t*1000 + d.getMilliseconds();
- return t;
- };
- var Timer = function() {
- var self = { elapsed: 0 };
- self.reset = function(){ self.elapsed = 0; return self };
- self.start = function(){ self.msec = now(); return self };
- self.stop = function() {
- self.elapsed += now() - self.msec;
- return self;
- };
- return self.start();
- };
-
- var self = { timers: {} };
- self.get = function(name) {
- if (!self.timers[name]) {
- self.timers[name] = new Timer();
- }
- return self.timers[name];
- };
- self.reset = function(name){ return self.get(name).reset(); };
- self.start = function(name){ return self.get(name).start(); };
- self.stop = function(name){ return self.get(name).stop(); };
- return self;
- };
- Util.Token = function(word) {
- var regex = {
- // hiragana: /[あ-んが-ぼぁ-ょゎっー]/,
- hiragana: /[\u3042-\u3093\u304C-\u307C\u3041-\u3087\u308E\u3063\u30FC]/,
- // katakana: /[ア-ンガ-ボァ-ョヮッー]/,
- katakana: /[\u30A2-\u30F3\u30AC-\u30DC\u30A1-\u30E7\u30EE\u30C3\u30FC]/,
- kanji: { test: function(w) {
- // return '一' <= w && w <= '龠' || w === '々';
- return '\u4E00' <= w && w <= '\u9FA0' || w === '\u3005';
- } },
- alphabet: /[a-zA-Z]/,
- digit: /[0-9]/
- };
- var tests = function(w){
- var match = {};
- for (var r in regex) {
- if (regex[r].test(w)) {
- match[r] = regex[r];
- }
- }
- return match;
- };
- var self = {
- first: tests(word.charAt(0)),
- last: tests(word.charAt(word.length-1))
- };
- self.isTokenized = function(prev, next) {
- var p = prev.length ? prev.charAt(prev.length-1) : '';
- var n = next.length ? next.charAt(0) : '';
- var check = function(w, test) {
- if (w.length) {
- for (var t in test) {
- if (test[t].test(w)) return false;
- }
- }
- return true;
- };
- return check(p, self.first) && check(n, self.last);
- };
-
- return self;
- };
- Util.inherit = function(child,parent) {
- var obj = child || {};
- for (var prop in parent) {
- if (typeof obj[prop] == 'undefined') {
- obj[prop] = parent[prop];
- }
- }
- return obj;
- };
- Util.countMatch = function(text, regex) {
- return text.split(regex).length - 1;
- // var n=0;
- // for (var i=0;;) {
- // i = text.search(regex);
- // if (i < 0) break;
- // n++;
- // text = text.substr(i+1);
- // }
- // return n;
- };
- Util.countMatchTokenized = function(text, word) {
- var count = 0;
- var prev = null;
- var tok = new Util.Token(word);
- var texts = text.split(word);
- var len = texts.length;
- for (var i=0; i < len; i++) {
- if (prev && tok.isTokenized(prev, texts[i])) count++;
- prev = texts[i]
- }
- return count;
- };
- Util.indexOfTokenized = function(text, word) {
- var index = text.indexOf(word);
- if (index >= 0) {
- var tok = new Util.Token(word);
- var p = index > 1 ? text.substr(index-1, 1) : '';
- var n = text.substr(index+word.length, 1);
- if (tok.isTokenized(p, n)) {
- return index;
- }
- }
- return -1;
- };
- Util.dump = function(obj) {
- if (typeof obj == 'undefined') return 'undefined';
- if (typeof obj == 'string') return '"' + obj + '"';
- if (typeof obj != 'object') return ''+obj;
- if (obj === null) return 'null';
- if (obj instanceof Array) {
- return '['
- + obj.map(function(v){return 'obj'/*Util.dump(v)*/;}).join(',')
- + ']';
- } else {
- var arr = [];
- for (var prop in obj) {
- arr.push(prop + ':' + 'obj'/*Util.dump(obj[prop])*/);
- }
- return '{' + arr.join(',') + '}';
- }
- };
- return Util;
-})();
-
-ExtractContentJS.Lib.A = (function() {
- var A = {};
- A.indexOf = Array.indexOf || function(self, elt/*, from*/) {
- var argi = 2;
- var len = self.length;
- var from = Number(arguments[argi++]) || 0;
- from = (from < 0) ? Math.ceil(from) : Math.floor(from);
- if (from < 0) from += len;
- for (; from < len; from++) {
- if (from in self && self[from] === elt) return from;
- }
- return -1;
- };
- A.filter = Array.filter || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.filter: not a function');
- }
- var rv = new Array();
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- var val = self[i]; // in case fun mutates this
- if (fun.call(thisp, val, i, self)) rv.push(val);
- }
- }
- return rv;
- };
- A.forEach = Array.forEach || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.forEach: not a function');
- }
- var thisp = arguments[argi++];
- for (var i=0; i < len; i++) {
- if (i in self) fun.call(thisp, self[i], i, self);
- }
- };
- A.every = Array.every || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.every: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- !fun.call(thisp, self[i], i, self)) {
- return false;
- }
- }
- return true;
- };
- A.map = Array.map || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.map: not a function');
- }
- var rv = new Array(len);
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- rv[i] = fun.call(thisp, self[i], i, self);
- }
- }
- return rv;
- };
- A.some = Array.some || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.some: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- fun.call(thisp, self[i], i, self)) {
- return true;
- }
- }
- return false;
- };
- A.reduce = Array.reduce || function(self, fun/*, initial*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw TypeError('A.reduce: not a function ');
- }
- var i = 0;
- var prev;
- if (arguments.length > argi) {
- var rv = arguments[argi++];
- } else {
- do {
- if (i in self) {
- rv = self[i++];
- break;
- }
- if (++i >= len) {
- throw new TypeError('A.reduce: empty array');
- }
- } while (true);
- }
- for (; i < len; i++) {
- if (i in self) rv = fun.call(null, rv, self[i], i, self);
- }
- return rv;
- };
- A.zip = function(self) {
- if (self[0] instanceof Array) {
- var l = self[0].length;
- var len = self.length;
- var z = new Array(l);
- for (var i=0; i < l; i++) {
- z[i] = [];
- for (var j=0; j < len; j++) {
- z[i].push(self[j][i]);
- }
- }
- return z;
- }
- return [];
- };
- A.first = function(self) {
- return self ? self[0] : null;
- };
- A.last = function(self) {
- return self ? self[self.length-1] : null;
- };
- A.push = function(self, other) {
- return Array.prototype.push.apply(self, other);
- };
- return A;
-})();
-
-ExtractContentJS.Lib.DOM = (function() {
- var A = ExtractContentJS.Lib.A;
- var DOM = {};
- DOM.getElementStyle = function(elem, prop) {
- var style = elem.style ? elem.style[prop] : null;
- if (!style) {
- var dv = elem.ownerDocument.defaultView;
- if (dv && dv.getComputedStyle) {
- try {
- var styles = dv.getComputedStyle(elem, null);
- } catch(e) {
- return null;
- }
- prop = prop.replace(/([A-Z])/g, '-$1').toLowerCase();
- style = styles ? styles.getPropertyValue(prop) : null;
- } else if (elem.currentStyle) {
- style = elem.currentStyle[prop];
- }
- }
- return style;
- };
- DOM.text = function(node) {
- if (typeof node.textContent != 'undefined') {
- return node.textContent;
- } else if (node.nodeName == '#text') {
- return node.nodeValue;
- } else if (typeof node.innerText != 'undefined') {
- return node.innerText; // IE
- }
- return null;
- };
- DOM.ancestors = function(e) {
- var body = e.ownerDocument.body;
- var r = [];
- var it = e;
- while (it != body) {
- r.push(it);
- it = it.parentNode;
- }
- r.push(body);
- return r; // [e .. document.body]
- };
- DOM.commonAncestor = function(e1, e2) {
- var a1 = DOM.ancestors(e1).reverse();
- var a2 = DOM.ancestors(e2).reverse();
- var r = null;
- for (var i=0; a1[i] && a2[i] && a1[i] == a2[i]; i++) {
- r = a1[i];
- }
- return r;
- };
- DOM.countMatchTagAttr = function(node, tag, attr, regexs) {
- var test = function(v){ return v.test(node[attr]); };
- if ((node.tagName||'').toLowerCase()==tag && A.some(regexs,test)) {
- return 1;
- }
- var n=0;
- var children = node.childNodes;
- for (var i=0, len=children.length; i < len; i++) {
- n += DOM.countMatchTagAttr(children[i], tag, attr, regexs);
- }
- return n;
- };
- DOM.matchTag = function(node, pat) {
- return A.some(pat, function(v){
- if (typeof v == 'string') {
- return v == (node.tagName||'').toLowerCase();
- } else if (v instanceof Array) {
- return v[0] == (node.tagName||'').toLowerCase()
- && DOM.matchAttr(node, v[1]);
- } else {
- return false;
- }
- });
- };
- DOM.matchAttr = function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- } else if (pat instanceof Object) {
- for (var prop in pat) {
- var n = node[prop];
- if (n && DOM.matchAttr(n, pat[prop])) {
- return true;
- }
- }
- }
- return false;
- };
- for (var prop in pat) {
- var attr = node[prop];
- var ar = pat[prop];
- if (attr) {
- return test(ar, attr);
- }
- }
- return false;
- };
- DOM.matchStyle = function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- }
- return false;
- };
- for (var prop in pat) {
- if (test(pat[prop], DOM.getElementStyle(node, prop))) {
- return true;
- }
- }
- return false;
- };
- return DOM;
-})();
-
-if (typeof ExtractContentJS == 'undefined') {
- var ExtractContentJS = {};
-}
-(function(ns) {
- var Util = ns.Lib.Util;
- var A = ns.Lib.A;
- var DOM = ns.Lib.DOM;
+// # Changes
+// - <at>require loading
+// - sugared Array wrapper
+// - x100 quicken
+// - invert dotted outline style
- var Leaf = Util.inherit(function(node/*, depth, inside, limit*/) {
- var depth = arguments[1] || 0;
- var inside = arguments[2] || {};
- var limit = arguments[3] || 1048576;
- var leaf = { node: node, depth: depth, inside: inside };
-
- leaf.statistics = function() {
- var t = (DOM.text(node) || '').replace(/\s+/g, ' ');
- var l = t.length;
- return {
- text: t.substr(0, limit),
- noLinkText: (inside.link || inside.form) ? '' : t,
- listTextLength: inside.list ? l : 0,
- noListTextLength: inside.list ? 0 : l,
- linkCount: inside.link ? 1 : 0,
- listCount: inside.li ? 1 : 0,
- linkListCount: (inside.li && inside.link) ? 1 : 0
- };
- };
-
- return leaf;
- }, {
- commonAncestor: function(/* leaves */) {
- var ar = A.map(arguments, function(v){ return v.node; });
- if (ar.length < 2) {
- return ar[0];
- }
- return A.reduce(ar, function(prev, curr) {
- return DOM.commonAncestor(prev, curr);
- });
- },
- mergeStatistics: function(a, b) {
- var r = {};
- for (var prop in a) {
- r[prop] = a[prop] + b[prop];
- }
- return r;
- }
+// http://github.com/hatena/extract-content-javascript/
+(function () {
+ var ex = new ExtractContentJS.LayeredExtractor();
+ ex.addHandler(ex.factory.getHandler('Heuristics'));
+ var res = ex.extract(document);
+ var main = res.content.asNode(); // ? not use below
+ var maintree = res.content.asLeaves();
+ Array.prototype.forEach.call(maintree, function (tree, idx) {
+ setTimeout(function () {
+ tree.node.parentNode.setAttribute('style', 'outline-style: dotted; outline-width: 1px; outline-color: invert');
+ }, 10 * idx);
});
-
- var Block = function(leaves) {
- leaves = A.filter(leaves, function(v) {
- var s = DOM.text(v.node) || '';
- s = s.replace(/\s+/g, '');
- return s.length != 0;
- });
- var block = { score: 0, leaves: leaves };
- block.commonAncestor = function() {
- return Leaf.commonAncestor.apply(null, block.leaves);
- };
- return block;
- };
-
- var Content = function(c) {
- var self = { _content: c };
-
- self.asLeaves = function(){ return self._content; };
- self.asNode = function() {
- if (self._node) return self._node;
- self._node = Leaf.commonAncestor.apply(null, self._content);
- return self._node;
- };
- self.asTextFragment = function() {
- if (self._textFragment) return self._textFragment;
- if (self._content.length < 1) return '';
- self._textFragment = A.reduce(self._content, function(prev,curr) {
- var s = DOM.text(curr.node);
- s = s.replace(/^\s+/g,'').replace(/\s+$/g,'');
- s = s.replace(/\s+/g,' ');
- return prev + s;
- }, '');
- return self._textFragment;
- };
- self.asText = function() {
- if (self._text) return self._text;
- // covering node
- var node = self.asNode();
- self._text = node ? DOM.text(node) : '';
- return self._text;
- };
- self.toString = function() {
- return self.asTextFragment();
- };
-
- return self;
- };
-
- ns.LayeredExtractor = function(/* handler, filter */) {
- var self = { handler: arguments[0] || [], filter: arguments[1] || {} };
-
- self.factory = {
- getHandler: function(name) {
- if (typeof ns.LayeredExtractor.Handler != 'undefined') {
- return new ns.LayeredExtractor.Handler[name];
- }
- return null;
- }
- };
-
- self.addHandler = function(handler) {
- if (typeof handler != 'undefined') {
- self.handler.push(handler);
- }
- return self;
- };
-
- self.filterFor = function(url) {
- // TODO
- };
-
- self.extract = function(d) {
- var url = d.location.href;
- var res = { title: d.title, url: d.location.href };
- var len = self.handler.length;
- for (var i=0; i < len; i++) {
- var content = self.handler[i].extract(d, url, res);
- if (!content) continue;
-
- var f = self.filterFor(url);
- if (f) {
- content = f.filter(content);
- }
-
- content = new Content(content);
- if (!content.toString().length) continue;
- res.content = content;
- res.isSuccess = true;
- res.engine = res.engine || self.handler[i];
- break;
- }
- return res;
- };
-
- return self;
- };
- ns.LayeredExtractor.Handler = {};
-
- ns.LayeredExtractor.Handler.Heuristics = function(/*option, pattern*/) {
- var self = {
- name: 'Heuristics',
- content: [],
- opt: Util.inherit(arguments[0], {
- threshold: 60,
- minLength: 30,
- factor: {
- decay: 0.75,
- noBody: 0.72,
- continuous: 1.16//1.62
- },
- punctuationWeight: 10,
- minNoLink: 8,
- noListRatio: 0.2,
- limit: {
- leaves: 800,
- recursion: 20,
- text: 1048576
- },
- debug: false
- }),
- pat: Util.inherit(arguments[1], {
- sep: [
- 'div', 'center', 'td',
- 'h1', 'h2'
- ],
- waste: [
- /Copyright|All\s*Rights?\s*Reserved?/i
- ],
- affiliate: [
- /amazon[a-z0-9\.\/\-\?&]+-22/i
- ],
- list: [ 'ul', 'dl', 'ol' ],
- li: [ 'li', 'dd' ],
- a: [ 'a' ],
- form: [ 'form' ],
- noContent: [ 'frameset' ],
- ignore: [
- 'iframe',
- 'img',
- 'script',
- 'style',
- 'select',
- 'noscript',
- [ 'div', {
- id: [ /more/, /menu/, /side/, /navi/ ],
- className: [ /more/, /menu/, /side/, /navi/ ]
- } ]
- ],
- ignoreStyle: {
- display: 'none',
- visibility: 'hidden'
- },
- // punctuations: /[。、.,!?]|\.[^A-Za-z0-9]|,[^0-9]|!|\?/
- punctuations: /[\u3002\u3001\uFF0E\uFF0C\uFF01\uFF1F]|\.[^A-Za-z0-9]|,[^0-9]|!|\?/
- })
- };
-
- var MyBlock = Util.inherit(function(leaves) {
- var block = new Block(leaves);
-
- block.eliminateLinks = function() {
- var st = A.map(block.leaves, function(v){
- return v.statistics();
- });
- if (!st.length) return '';
- if (st.length == 1) {
- st = st[0];
- } else {
- st = A.reduce(st, function(prev, curr) {
- return Leaf.mergeStatistics(prev, curr);
- });
- }
-
- var nolinklen = st.noLinkText.length;
- var links = st.linkCount;
- var listlen = st.listTextLength;
- if (nolinklen < self.opt.minNoLink * links) {
- return '';
- }
-
- // isLinklist
- var rate = st.linkListCount / (st.listCount || 1);
- rate *= rate;
- var limit = self.opt.noListRatio * rate * listlen;
- if (nolinklen < limit) {
- return '';
- }
-
- return st.noLinkText;
- };
- block.noBodyRate = function() {
- var val = 0;
- if (block.leaves.length > 0) {
- val += A.reduce(block.leaves, function(prev, curr) {
- return prev
- + DOM.countMatchTagAttr(curr.node, 'a', 'href',
- self.pat.affiliate);
- }, 0);
- }
- val /= 2.0;
- val += A.reduce(self.pat.waste, function(prev,curr) {
- return prev + Util.countMatch(block._nolink, curr);
- }, 0);
- return val;
- };
-
- block.calcScore = function(factor, continuous) {
- // ignore link list block
- block._nolink = block.eliminateLinks();
- if (block._nolink.length < self.opt.minLength) return 0;
-
- var c = Util.countMatch(block._nolink, self.pat.punctuations);
- c *= self.opt.punctuationWeight;
- c += block._nolink.length;
- c *= factor;
-
- // anti-scoring factors
- var noBodyRate = block.noBodyRate();
-
- // scores
- c *= Math.pow(self.opt.factor.noBody, noBodyRate);
- block._c = block.score = c;
- block._c1 = c * continuous;
- return c;
- };
-
- block.isAccepted = function() {
- return block._c > self.opt.threshold;
- };
-
- block.isContinuous = function() {
- return block._c1 > self.opt.threshold;
- };
-
- block.merge = function(other) {
- block.score += other._c1;
- block.depth = Math.min(block.depth, other.depth);
- A.push(block.leaves, other.leaves);
- return block;
- };
-
- return block;
- }, {
- split: function(node) {
- var r = [];
- var buf = [];
- var leaves = 0;
- var limit = self.opt.limit.text;
-
- var flush = function(flag) {
- if (flag && buf.length) {
- r.push(new MyBlock(buf));
- buf = [];
- }
- };
-
- var rec = function(node, depth, inside) {
- // depth-first recursion
- if (leaves >= self.opt.limit.leaves) return r;
- if (depth >= self.opt.limit.recursion) return r;
- if (node.nodeName == '#comment') return r;
- if (DOM.matchTag(node, self.pat.ignore)) return r;
- if (DOM.matchStyle(node, self.pat.ignoreStyle)) return r;
- var children = node.childNodes;
- var sep = self.pat.sep;
- var len = children.length;
- var flags = {
- form: inside.form || DOM.matchTag(node, self.pat.form),
- link: inside.link || DOM.matchTag(node, self.pat.a),
- list: inside.list || DOM.matchTag(node, self.pat.list),
- li: inside.li || DOM.matchTag(node, self.pat.li)
- };
- for (var i=0; i < len; i++) {
- var c = children[i];
- var f = DOM.matchTag(c, sep);
- flush(f);
- rec(c, depth+1, flags);
- flush(f);
- }
- if (!len) {
- leaves++;
- buf.push(new Leaf(node, depth, flags, limit));
- }
- return r;
- };
-
- rec(node, 0, {});
- flush(true);
-
- return r;
- }
- });
-
- self.extract = function(d/*, url, res*/) {
- var isNoContent = function(v){
- return d.getElementsByTagName(v).length != 0;
- };
- if (A.some(self.pat.noContent, isNoContent)) return self;
-
- var factor = 1.0;
- var continuous = 1.0;
- var score = 0;
-
- var res = [];
- var blocks = MyBlock.split(d.body);
- var last;
-
- var len = blocks.length;
- for (var i=0; i < len; i++) {
- var block = blocks[i];
- if (last) {
- continuous /= self.opt.factor.continuous;
- }
-
- // score
- if (!block.calcScore(factor, continuous)) continue;
- factor *= self.opt.factor.decay;
-
- // clustor scoring
- if (block.isAccepted()) {
- if (block.isContinuous() && last) {
- last.merge(block);
- } else {
- last = block;
- res.push(block);
- }
- continuous = self.opt.factor.continuous;
- } else { // rejected
- if (!last) {
- // do not decay if no block is pushed
- factor = 1.0
- }
- }
- }
-
- self.blocks = res.sort(function(a,b){return b.score-a.score;});
- var best = A.first(self.blocks);
- if (best) {
- self.content = best.leaves;
- }
-
- return self.content;
- };
-
- return self;
- };
-
- ns.LayeredExtractor.Handler.GoogleAdSection = function(/*opt*/) {
- var self = {
- name: 'GoogleAdSection',
- content: [],
- state: [],
- opt: Util.inherit(arguments[0], {
- limit: {
- leaves: 800,
- recursion: 20
- },
- debug: false
- })
- };
-
- var pat = {
- ignore: /google_ad_section_start\(weight=ignore\)/i,
- section: /google_ad_section_start/i,
- end: /google_ad_section_end/i
- };
- var stIgnore = 1;
- var stSection = 2;
-
- self.inSection = function(){return A.last(self.state)==stSection;};
- self.ignore = function(){self.state.push(stIgnore);}
- self.section = function(){self.state.push(stSection);}
- self.end = function(){ if (self.state.length) self.state.pop(); };
- self.parse = function(node/*, depth*/) {
- var depth = arguments[1] || 0;
- if (node.nodeName == '#comment') {
- if (pat.ignore.test(node.nodeValue)) {
- self.ignore();
- } else if (pat.section.test(node.nodeValue)) {
- self.section();
- } else if (pat.end.test(node.nodeValue)) {
- self.end();
- }
- return;
- }
-
- if (self.content.length >= self.opt.limit.leaves) return;
- if (depth >= self.opt.limit.recursion) return;
- var children = node.childNodes;
- var len = children.length;
- for (var i=0; i < len; i++) {
- var c = children[i];
- self.parse(c, depth+1);
- }
- if (!len && self.inSection()) {
- self.content.push(new Leaf(node, depth));
- }
- return;
- };
-
- self.extract = function(d/*, url, res*/) {
- self.parse(d);
- self.blocks = [ new Block(self.content) ];
- return self.content;
- };
-
- return self;
- };
-})(ExtractContentJS);
-// こっから
-var ex = new ExtractContentJS.LayeredExtractor();
-ex.addHandler( ex.factory.getHandler('Heuristics') );
-var res = ex.extract(document);
-var main = res.content.asNode();
-var maintree = res.content.asLeaves();
-for(var i=0;i<maintree.length;i++) (function(node){
- setTimeout(function(){
- node.parentNode.setAttribute("style" ,"outline-style: solid; outline-width: 1px; outline-color: #0000ff");
- } , 1000*i);
-})(maintree[i].node);
+})();
/*
* @title extract-content-javascript flow
* @description extract-content-javascript (forked)
* @include http://*
* @license MIT License http://www.opensource.org/licenses/mit-license
* @require https://raw.github.com/hatena/extract-content-javascript/master/lib/extract-content-all.js
* @contributor efcl http://let.hatelabo.jp/efcl/let/gYC-yqSn9_ytBw
* @author noromanba
*/
// # Changes
// - <at>require loading
// - sugared Array wrapper
// - x100 quicken
// - invert dotted outline style
// http://github.com/hatena/extract-content-javascript/
(function () {
var ex = new ExtractContentJS.LayeredExtractor();
ex.addHandler(ex.factory.getHandler('Heuristics'));
var res = ex.extract(document);
var main = res.content.asNode(); // ? not use below
var maintree = res.content.asLeaves();
Array.prototype.forEach.call(maintree, function (tree, idx) {
setTimeout(function () {
tree.node.parentNode.setAttribute('style', 'outline-style: dotted; outline-width: 1px; outline-color: invert');
}, 10 * idx);
});
})();
- Permalink
- このページへの個別リンクです。
- RAW
- 書かれたコードへの直接のリンクです。
- Packed
- 文字列が圧縮された書かれたコードへのリンクです。
- Userscript
- Greasemonkey 等で利用する場合の .user.js へのリンクです。
- Loader
- @require やソースコードが長い場合に多段ロードする Loader コミのコードへのリンクです。
- Metadata
- コード中にコメントで @xxx と書かれたメタデータの JSON です。