w

    @@ -2,7 +2,8 @@ * @title w * @description テキストwwwをw面白くwwするwwww * @include http://* - * @license MIT License + * @license new BSD license. + * @javascript_url */ // TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript
  • /*
     * @title w
     * @description テキストwwwをw面白くwwするwwww
     * @include http://*
     * @license new BSD license.
     * @javascript_url
     */
    
    // TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript
    // (c) 2008 Taku Kudo <taku@chasen.org>
    // TinySegmenter is freely distributable under the terms of a new BSD licence.
    // For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt
    
    (function() {
      function TinySegmenter() {
        var patterns = {
          "[一二三四五六七八九十百千万億兆]": "M",
          "[一-龠々〆ヵヶ]": "H",
          "[ぁ-ん]": "I",
          "[ァ-ヴーア-ン゙ー]": "K",
          "[a-zA-Za-zA-Z]": "A",
          "[0-90-9]": "N"
        };
        this.chartype_ = [];
        for (var i in patterns) {
          var regexp = new RegExp;
          regexp.compile(i);
          this.chartype_.push([regexp, patterns[i]]);
        }
    
        this.BIAS__ = -332;
        this.BC1__ = {
          "HH": 6,
          "II": 2461,
          "KH": 406,
          "OH": -1378
        };
        this.BC2__ = {
          "AA": -3267,
          "AI": 2744,
          "AN": -878,
          "HH": -4070,
          "HM": -1711,
          "HN": 4012,
          "HO": 3761,
          "IA": 1327,
          "IH": -1184,
          "II": -1332,
          "IK": 1721,
          "IO": 5492,
          "KI": 3831,
          "KK": -8741,
          "MH": -3132,
          "MK": 3334,
          "OO": -2920
        };
        this.BC3__ = {
          "HH": 996,
          "HI": 626,
          "HK": -721,
          "HN": -1307,
          "HO": -836,
          "IH": -301,
          "KK": 2762,
          "MK": 1079,
          "MM": 4034,
          "OA": -1652,
          "OH": 266
        };
        this.BP1__ = {
          "BB": 295,
          "OB": 304,
          "OO": -125,
          "UB": 352
        };
        this.BP2__ = {
          "BO": 60,
          "OO": -1762
        };
        this.BQ1__ = {
          "BHH": 1150,
          "BHM": 1521,
          "BII": -1158,
          "BIM": 886,
          "BMH": 1208,
          "BNH": 449,
          "BOH": -91,
          "BOO": -2597,
          "OHI": 451,
          "OIH": -296,
          "OKA": 1851,
          "OKH": -1020,
          "OKK": 904,
          "OOO": 2965
        };
        this.BQ2__ = {
          "BHH": 118,
          "BHI": -1159,
          "BHM": 466,
          "BIH": -919,
          "BKK": -1720,
          "BKO": 864,
          "OHH": -1139,
          "OHM": -181,
          "OIH": 153,
          "UHI": -1146
        };
        this.BQ3__ = {
          "BHH": -792,
          "BHI": 2664,
          "BII": -299,
          "BKI": 419,
          "BMH": 937,
          "BMM": 8335,
          "BNN": 998,
          "BOH": 775,
          "OHH": 2174,
          "OHM": 439,
          "OII": 280,
          "OKH": 1798,
          "OKI": -793,
          "OKO": -2242,
          "OMH": -2402,
          "OOO": 11699
        };
        this.BQ4__ = {
          "BHH": -3895,
          "BIH": 3761,
          "BII": -4654,
          "BIK": 1348,
          "BKK": -1806,
          "BMI": -3385,
          "BOO": -12396,
          "OAH": 926,
          "OHH": 266,
          "OHK": -2036,
          "ONN": -973
        };
        this.BW1__ = {
          ",と": 660,
          ",同": 727,
          "B1あ": 1404,
          "B1同": 542,
          "、と": 660,
          "、同": 727,
          "」と": 1682,
          "あっ": 1505,
          "いう": 1743,
          "いっ": -2055,
          "いる": 672,
          "うし": -4817,
          "うん": 665,
          "から": 3472,
          "がら": 600,
          "こう": -790,
          "こと": 2083,
          "こん": -1262,
          "さら": -4143,
          "さん": 4573,
          "した": 2641,
          "して": 1104,
          "すで": -3399,
          "そこ": 1977,
          "それ": -871,
          "たち": 1122,
          "ため": 601,
          "った": 3463,
          "つい": -802,
          "てい": 805,
          "てき": 1249,
          "でき": 1127,
          "です": 3445,
          "では": 844,
          "とい": -4915,
          "とみ": 1922,
          "どこ": 3887,
          "ない": 5713,
          "なっ": 3015,
          "など": 7379,
          "なん": -1113,
          "にし": 2468,
          "には": 1498,
          "にも": 1671,
          "に対": -912,
          "の一": -501,
          "の中": 741,
          "ませ": 2448,
          "まで": 1711,
          "まま": 2600,
          "まる": -2155,
          "やむ": -1947,
          "よっ": -2565,
          "れた": 2369,
          "れで": -913,
          "をし": 1860,
          "を見": 731,
          "亡く": -1886,
          "京都": 2558,
          "取り": -2784,
          "大き": -2604,
          "大阪": 1497,
          "平方": -2314,
          "引き": -1336,
          "日本": -195,
          "本当": -2423,
          "毎日": -2113,
          "目指": -724,
          "B1あ": 1404,
          "B1同": 542,
          "」と": 1682
        };
        this.BW2__ = {
          "..": -11822,
          "11": -669,
          "――": -5730,
          "−−": -13175,
          "いう": -1609,
          "うか": 2490,
          "かし": -1350,
          "かも": -602,
          "から": -7194,
          "かれ": 4612,
          "がい": 853,
          "がら": -3198,
          "きた": 1941,
          "くな": -1597,
          "こと": -8392,
          "この": -4193,
          "させ": 4533,
          "され": 13168,
          "さん": -3977,
          "しい": -1819,
          "しか": -545,
          "した": 5078,
          "して": 972,
          "しな": 939,
          "その": -3744,
          "たい": -1253,
          "たた": -662,
          "ただ": -3857,
          "たち": -786,
          "たと": 1224,
          "たは": -939,
          "った": 4589,
          "って": 1647,
          "っと": -2094,
          "てい": 6144,
          "てき": 3640,
          "てく": 2551,
          "ては": -3110,
          "ても": -3065,
          "でい": 2666,
          "でき": -1528,
          "でし": -3828,
          "です": -4761,
          "でも": -4203,
          "とい": 1890,
          "とこ": -1746,
          "とと": -2279,
          "との": 720,
          "とみ": 5168,
          "とも": -3941,
          "ない": -2488,
          "なが": -1313,
          "など": -6509,
          "なの": 2614,
          "なん": 3099,
          "にお": -1615,
          "にし": 2748,
          "にな": 2454,
          "によ": -7236,
          "に対": -14943,
          "に従": -4688,
          "に関": -11388,
          "のか": 2093,
          "ので": -7059,
          "のに": -6041,
          "のの": -6125,
          "はい": 1073,
          "はが": -1033,
          "はず": -2532,
          "ばれ": 1813,
          "まし": -1316,
          "まで": -6621,
          "まれ": 5409,
          "めて": -3153,
          "もい": 2230,
          "もの": -10713,
          "らか": -944,
          "らし": -1611,
          "らに": -1897,
          "りし": 651,
          "りま": 1620,
          "れた": 4270,
          "れて": 849,
          "れば": 4114,
          "ろう": 6067,
          "われ": 7901,
          "を通": -11877,
          "んだ": 728,
          "んな": -4115,
          "一人": 602,
          "一方": -1375,
          "一日": 970,
          "一部": -1051,
          "上が": -4479,
          "会社": -1116,
          "出て": 2163,
          "分の": -7758,
          "同党": 970,
          "同日": -913,
          "大阪": -2471,
          "委員": -1250,
          "少な": -1050,
          "年度": -8669,
          "年間": -1626,
          "府県": -2363,
          "手権": -1982,
          "新聞": -4066,
          "日新": -722,
          "日本": -7068,
          "日米": 3372,
          "曜日": -601,
          "朝鮮": -2355,
          "本人": -2697,
          "東京": -1543,
          "然と": -1384,
          "社会": -1276,
          "立て": -990,
          "第に": -1612,
          "米国": -4268,
          "11": -669
        };
        this.BW3__ = {
          "あた": -2194,
          "あり": 719,
          "ある": 3846,
          "い.": -1185,
          "い。": -1185,
          "いい": 5308,
          "いえ": 2079,
          "いく": 3029,
          "いた": 2056,
          "いっ": 1883,
          "いる": 5600,
          "いわ": 1527,
          "うち": 1117,
          "うと": 4798,
          "えと": 1454,
          "か.": 2857,
          "か。": 2857,
          "かけ": -743,
          "かっ": -4098,
          "かに": -669,
          "から": 6520,
          "かり": -2670,
          "が,": 1816,
          "が、": 1816,
          "がき": -4855,
          "がけ": -1127,
          "がっ": -913,
          "がら": -4977,
          "がり": -2064,
          "きた": 1645,
          "けど": 1374,
          "こと": 7397,
          "この": 1542,
          "ころ": -2757,
          "さい": -714,
          "さを": 976,
          "し,": 1557,
          "し、": 1557,
          "しい": -3714,
          "した": 3562,
          "して": 1449,
          "しな": 2608,
          "しま": 1200,
          "す.": -1310,
          "す。": -1310,
          "する": 6521,
          "ず,": 3426,
          "ず、": 3426,
          "ずに": 841,
          "そう": 428,
          "た.": 8875,
          "た。": 8875,
          "たい": -594,
          "たの": 812,
          "たり": -1183,
          "たる": -853,
          "だ.": 4098,
          "だ。": 4098,
          "だっ": 1004,
          "った": -4748,
          "って": 300,
          "てい": 6240,
          "てお": 855,
          "ても": 302,
          "です": 1437,
          "でに": -1482,
          "では": 2295,
          "とう": -1387,
          "とし": 2266,
          "との": 541,
          "とも": -3543,
          "どう": 4664,
          "ない": 1796,
          "なく": -903,
          "など": 2135,
          "に,": -1021,
          "に、": -1021,
          "にし": 1771,
          "にな": 1906,
          "には": 2644,
          "の,": -724,
          "の、": -724,
          "の子": -1000,
          "は,": 1337,
          "は、": 1337,
          "べき": 2181,
          "まし": 1113,
          "ます": 6943,
          "まっ": -1549,
          "まで": 6154,
          "まれ": -793,
          "らし": 1479,
          "られ": 6820,
          "るる": 3818,
          "れ,": 854,
          "れ、": 854,
          "れた": 1850,
          "れて": 1375,
          "れば": -3246,
          "れる": 1091,
          "われ": -605,
          "んだ": 606,
          "んで": 798,
          "カ月": 990,
          "会議": 860,
          "入り": 1232,
          "大会": 2217,
          "始め": 1681,
          "市": 965,
          "新聞": -5055,
          "日,": 974,
          "日、": 974,
          "社会": 2024,
          "カ月": 990
        };
        this.TC1__ = {
          "AAA": 1093,
          "HHH": 1029,
          "HHM": 580,
          "HII": 998,
          "HOH": -390,
          "HOM": -331,
          "IHI": 1169,
          "IOH": -142,
          "IOI": -1015,
          "IOM": 467,
          "MMH": 187,
          "OOI": -1832
        };
        this.TC2__ = {
          "HHO": 2088,
          "HII": -1023,
          "HMM": -1154,
          "IHI": -1965,
          "KKH": 703,
          "OII": -2649
        };
        this.TC3__ = {
          "AAA": -294,
          "HHH": 346,
          "HHI": -341,
          "HII": -1088,
          "HIK": 731,
          "HOH": -1486,
          "IHH": 128,
          "IHI": -3041,
          "IHO": -1935,
          "IIH": -825,
          "IIM": -1035,
          "IOI": -542,
          "KHH": -1216,
          "KKA": 491,
          "KKH": -1217,
          "KOK": -1009,
          "MHH": -2694,
          "MHM": -457,
          "MHO": 123,
          "MMH": -471,
          "NNH": -1689,
          "NNO": 662,
          "OHO": -3393
        };
        this.TC4__ = {
          "HHH": -203,
          "HHI": 1344,
          "HHK": 365,
          "HHM": -122,
          "HHN": 182,
          "HHO": 669,
          "HIH": 804,
          "HII": 679,
          "HOH": 446,
          "IHH": 695,
          "IHO": -2324,
          "IIH": 321,
          "III": 1497,
          "IIO": 656,
          "IOO": 54,
          "KAK": 4845,
          "KKA": 3386,
          "KKK": 3065,
          "MHH": -405,
          "MHI": 201,
          "MMH": -241,
          "MMM": 661,
          "MOM": 841
        };
        this.TQ1__ = {
          "BHHH": -227,
          "BHHI": 316,
          "BHIH": -132,
          "BIHH": 60,
          "BIII": 1595,
          "BNHH": -744,
          "BOHH": 225,
          "BOOO": -908,
          "OAKK": 482,
          "OHHH": 281,
          "OHIH": 249,
          "OIHI": 200,
          "OIIH": -68
        };
        this.TQ2__ = {
          "BIHH": -1401,
          "BIII": -1033,
          "BKAK": -543,
          "BOOO": -5591
        };
        this.TQ3__ = {
          "BHHH": 478,
          "BHHM": -1073,
          "BHIH": 222,
          "BHII": -504,
          "BIIH": -116,
          "BIII": -105,
          "BMHI": -863,
          "BMHM": -464,
          "BOMH": 620,
          "OHHH": 346,
          "OHHI": 1729,
          "OHII": 997,
          "OHMH": 481,
          "OIHH": 623,
          "OIIH": 1344,
          "OKAK": 2792,
          "OKHH": 587,
          "OKKA": 679,
          "OOHH": 110,
          "OOII": -685
        };
        this.TQ4__ = {
          "BHHH": -721,
          "BHHM": -3604,
          "BHII": -966,
          "BIIH": -607,
          "BIII": -2181,
          "OAAA": -2763,
          "OAKK": 180,
          "OHHH": -294,
          "OHHI": 2446,
          "OHHO": 480,
          "OHIH": -1573,
          "OIHH": 1935,
          "OIHI": -493,
          "OIIH": 626,
          "OIII": -4007,
          "OKAK": -8156
        };
        this.TW1__ = {
          "につい": -4681,
          "東京都": 2026
        };
        this.TW2__ = {
          "ある程": -2049,
          "いった": -1256,
          "ころが": -2434,
          "しょう": 3873,
          "その後": -4430,
          "だって": -1049,
          "ていた": 1833,
          "として": -4657,
          "ともに": -4517,
          "もので": 1882,
          "一気に": -792,
          "初めて": -1512,
          "同時に": -8097,
          "大きな": -1255,
          "対して": -2721,
          "社会党": -3216
        };
        this.TW3__ = {
          "いただ": -1734,
          "してい": 1314,
          "として": -4314,
          "につい": -5483,
          "にとっ": -5989,
          "に当た": -6247,
          "ので,": -727,
          "ので、": -727,
          "のもの": -600,
          "れから": -3752,
          "十二月": -2287
        };
        this.TW4__ = {
          "いう.": 8576,
          "いう。": 8576,
          "からな": -2348,
          "してい": 2958,
          "たが,": 1516,
          "たが、": 1516,
          "ている": 1538,
          "という": 1349,
          "ました": 5543,
          "ません": 1097,
          "ようと": -4258,
          "よると": 5865
        };
        this.UC1__ = {
          "A": 484,
          "K": 93,
          "M": 645,
          "O": -505
        };
        this.UC2__ = {
          "A": 819,
          "H": 1059,
          "I": 409,
          "M": 3987,
          "N": 5775,
          "O": 646
        };
        this.UC3__ = {
          "A": -1370,
          "I": 2311
        };
        this.UC4__ = {
          "A": -2643,
          "H": 1809,
          "I": -1032,
          "K": -3450,
          "M": 3565,
          "N": 3876,
          "O": 6646
        };
        this.UC5__ = {
          "H": 313,
          "I": -1238,
          "K": -799,
          "M": 539,
          "O": -831
        };
        this.UC6__ = {
          "H": -506,
          "I": -253,
          "K": 87,
          "M": 247,
          "O": -387
        };
        this.UP1__ = {
          "O": -214
        };
        this.UP2__ = {
          "B": 69,
          "O": 935
        };
        this.UP3__ = {
          "B": 189
        };
        this.UQ1__ = {
          "BH": 21,
          "BI": -12,
          "BK": -99,
          "BN": 142,
          "BO": -56,
          "OH": -95,
          "OI": 477,
          "OK": 410,
          "OO": -2422
        };
        this.UQ2__ = {
          "BH": 216,
          "BI": 113,
          "OK": 1759
        };
        this.UQ3__ = {
          "BA": -479,
          "BH": 42,
          "BI": 1913,
          "BK": -7198,
          "BM": 3160,
          "BN": 6427,
          "BO": 14761,
          "OI": -827,
          "ON": -3212
        };
        this.UW1__ = {
          ",": 156,
          "、": 156,
          "「": -463,
          "あ": -941,
          "う": -127,
          "が": -553,
          "き": 121,
          "こ": 505,
          "で": -201,
          "と": -547,
          "ど": -123,
          "に": -789,
          "の": -185,
          "は": -847,
          "も": -466,
          "や": -470,
          "よ": 182,
          "ら": -292,
          "り": 208,
          "れ": 169,
          "を": -446,
          "ん": -137,
          "・": -135,
          "主": -402,
          "京": -268,
          "区": -912,
          "午": 871,
          "国": -460,
          "大": 561,
          "委": 729,
          "市": -411,
          "日": -141,
          "理": 361,
          "生": -408,
          "県": -386,
          "都": -718,
          "「": -463,
          "・": -135
        };
        this.UW2__ = {
          ",": -829,
          "、": -829,
          "〇": 892,
          "「": -645,
          "」": 3145,
          "あ": -538,
          "い": 505,
          "う": 134,
          "お": -502,
          "か": 1454,
          "が": -856,
          "く": -412,
          "こ": 1141,
          "さ": 878,
          "ざ": 540,
          "し": 1529,
          "す": -675,
          "せ": 300,
          "そ": -1011,
          "た": 188,
          "だ": 1837,
          "つ": -949,
          "て": -291,
          "で": -268,
          "と": -981,
          "ど": 1273,
          "な": 1063,
          "に": -1764,
          "の": 130,
          "は": -409,
          "ひ": -1273,
          "べ": 1261,
          "ま": 600,
          "も": -1263,
          "や": -402,
          "よ": 1639,
          "り": -579,
          "る": -694,
          "れ": 571,
          "を": -2516,
          "ん": 2095,
          "ア": -587,
          "カ": 306,
          "キ": 568,
          "ッ": 831,
          "三": -758,
          "不": -2150,
          "世": -302,
          "中": -968,
          "主": -861,
          "事": 492,
          "人": -123,
          "会": 978,
          "保": 362,
          "入": 548,
          "初": -3025,
          "副": -1566,
          "北": -3414,
          "区": -422,
          "大": -1769,
          "天": -865,
          "太": -483,
          "子": -1519,
          "学": 760,
          "実": 1023,
          "小": -2009,
          "市": -813,
          "年": -1060,
          "強": 1067,
          "手": -1519,
          "揺": -1033,
          "政": 1522,
          "文": -1355,
          "新": -1682,
          "日": -1815,
          "明": -1462,
          "最": -630,
          "朝": -1843,
          "本": -1650,
          "東": -931,
          "果": -665,
          "次": -2378,
          "民": -180,
          "気": -1740,
          "理": 752,
          "発": 529,
          "目": -1584,
          "相": -242,
          "県": -1165,
          "立": -763,
          "第": 810,
          "米": 509,
          "自": -1353,
          "行": 838,
          "西": -744,
          "見": -3874,
          "調": 1010,
          "議": 1198,
          "込": 3041,
          "開": 1758,
          "間": -1257,
          "「": -645,
          "」": 3145,
          "ッ": 831,
          "ア": -587,
          "カ": 306,
          "キ": 568
        };
        this.UW3__ = {
          ",": 4889,
          "1": -800,
          "−": -1723,
          "、": 4889,
          "々": -2311,
          "〇": 5827,
          "」": 2670,
          "〓": -3573,
          "あ": -2696,
          "い": 1006,
          "う": 2342,
          "え": 1983,
          "お": -4864,
          "か": -1163,
          "が": 3271,
          "く": 1004,
          "け": 388,
          "げ": 401,
          "こ": -3552,
          "ご": -3116,
          "さ": -1058,
          "し": -395,
          "す": 584,
          "せ": 3685,
          "そ": -5228,
          "た": 842,
          "ち": -521,
          "っ": -1444,
          "つ": -1081,
          "て": 6167,
          "で": 2318,
          "と": 1691,
          "ど": -899,
          "な": -2788,
          "に": 2745,
          "の": 4056,
          "は": 4555,
          "ひ": -2171,
          "ふ": -1798,
          "へ": 1199,
          "ほ": -5516,
          "ま": -4384,
          "み": -120,
          "め": 1205,
          "も": 2323,
          "や": -788,
          "よ": -202,
          "ら": 727,
          "り": 649,
          "る": 5905,
          "れ": 2773,
          "わ": -1207,
          "を": 6620,
          "ん": -518,
          "ア": 551,
          "グ": 1319,
          "ス": 874,
          "ッ": -1350,
          "ト": 521,
          "ム": 1109,
          "ル": 1591,
          "ロ": 2201,
          "ン": 278,
          "・": -3794,
          "一": -1619,
          "下": -1759,
          "世": -2087,
          "両": 3815,
          "中": 653,
          "主": -758,
          "予": -1193,
          "二": 974,
          "人": 2742,
          "今": 792,
          "他": 1889,
          "以": -1368,
          "低": 811,
          "何": 4265,
          "作": -361,
          "保": -2439,
          "元": 4858,
          "党": 3593,
          "全": 1574,
          "公": -3030,
          "六": 755,
          "共": -1880,
          "円": 5807,
          "再": 3095,
          "分": 457,
          "初": 2475,
          "別": 1129,
          "前": 2286,
          "副": 4437,
          "力": 365,
          "動": -949,
          "務": -1872,
          "化": 1327,
          "北": -1038,
          "区": 4646,
          "千": -2309,
          "午": -783,
          "協": -1006,
          "口": 483,
          "右": 1233,
          "各": 3588,
          "合": -241,
          "同": 3906,
          "和": -837,
          "員": 4513,
          "国": 642,
          "型": 1389,
          "場": 1219,
          "外": -241,
          "妻": 2016,
          "学": -1356,
          "安": -423,
          "実": -1008,
          "家": 1078,
          "小": -513,
          "少": -3102,
          "州": 1155,
          "市": 3197,
          "平": -1804,
          "年": 2416,
          "広": -1030,
          "府": 1605,
          "度": 1452,
          "建": -2352,
          "当": -3885,
          "得": 1905,
          "思": -1291,
          "性": 1822,
          "戸": -488,
          "指": -3973,
          "政": -2013,
          "教": -1479,
          "数": 3222,
          "文": -1489,
          "新": 1764,
          "日": 2099,
          "旧": 5792,
          "昨": -661,
          "時": -1248,
          "曜": -951,
          "最": -937,
          "月": 4125,
          "期": 360,
          "李": 3094,
          "村": 364,
          "東": -805,
          "核": 5156,
          "森": 2438,
          "業": 484,
          "氏": 2613,
          "民": -1694,
          "決": -1073,
          "法": 1868,
          "海": -495,
          "無": 979,
          "物": 461,
          "特": -3850,
          "生": -273,
          "用": 914,
          "町": 1215,
          "的": 7313,
          "直": -1835,
          "省": 792,
          "県": 6293,
          "知": -1528,
          "私": 4231,
          "税": 401,
          "立": -960,
          "第": 1201,
          "米": 7767,
          "系": 3066,
          "約": 3663,
          "級": 1384,
          "統": -4229,
          "総": 1163,
          "線": 1255,
          "者": 6457,
          "能": 725,
          "自": -2869,
          "英": 785,
          "見": 1044,
          "調": -562,
          "財": -733,
          "費": 1777,
          "車": 1835,
          "軍": 1375,
          "込": -1504,
          "通": -1136,
          "選": -681,
          "郎": 1026,
          "郡": 4404,
          "部": 1200,
          "金": 2163,
          "長": 421,
          "開": -1432,
          "間": 1302,
          "関": -1282,
          "雨": 2009,
          "電": -1045,
          "非": 2066,
          "駅": 1620,
          "1": -800,
          "」": 2670,
          "・": -3794,
          "ッ": -1350,
          "ア": 551,
          "グ": 1319,
          "ス": 874,
          "ト": 521,
          "ム": 1109,
          "ル": 1591,
          "ロ": 2201,
          "ン": 278
        };
        this.UW4__ = {
          ",": 3930,
          ".": 3508,
          "―": -4841,
          "、": 3930,
          "。": 3508,
          "〇": 4999,
          "「": 1895,
          "」": 3798,
          "〓": -5156,
          "あ": 4752,
          "い": -3435,
          "う": -640,
          "え": -2514,
          "お": 2405,
          "か": 530,
          "が": 6006,
          "き": -4482,
          "ぎ": -3821,
          "く": -3788,
          "け": -4376,
          "げ": -4734,
          "こ": 2255,
          "ご": 1979,
          "さ": 2864,
          "し": -843,
          "じ": -2506,
          "す": -731,
          "ず": 1251,
          "せ": 181,
          "そ": 4091,
          "た": 5034,
          "だ": 5408,
          "ち": -3654,
          "っ": -5882,
          "つ": -1659,
          "て": 3994,
          "で": 7410,
          "と": 4547,
          "な": 5433,
          "に": 6499,
          "ぬ": 1853,
          "ね": 1413,
          "の": 7396,
          "は": 8578,
          "ば": 1940,
          "ひ": 4249,
          "び": -4134,
          "ふ": 1345,
          "へ": 6665,
          "べ": -744,
          "ほ": 1464,
          "ま": 1051,
          "み": -2082,
          "む": -882,
          "め": -5046,
          "も": 4169,
          "ゃ": -2666,
          "や": 2795,
          "ょ": -1544,
          "よ": 3351,
          "ら": -2922,
          "り": -9726,
          "る": -14896,
          "れ": -2613,
          "ろ": -4570,
          "わ": -1783,
          "を": 13150,
          "ん": -2352,
          "カ": 2145,
          "コ": 1789,
          "セ": 1287,
          "ッ": -724,
          "ト": -403,
          "メ": -1635,
          "ラ": -881,
          "リ": -541,
          "ル": -856,
          "ン": -3637,
          "・": -4371,
          "ー": -11870,
          "一": -2069,
          "中": 2210,
          "予": 782,
          "事": -190,
          "井": -1768,
          "人": 1036,
          "以": 544,
          "会": 950,
          "体": -1286,
          "作": 530,
          "側": 4292,
          "先": 601,
          "党": -2006,
          "共": -1212,
          "内": 584,
          "円": 788,
          "初": 1347,
          "前": 1623,
          "副": 3879,
          "力": -302,
          "動": -740,
          "務": -2715,
          "化": 776,
          "区": 4517,
          "協": 1013,
          "参": 1555,
          "合": -1834,
          "和": -681,
          "員": -910,
          "器": -851,
          "回": 1500,
          "国": -619,
          "園": -1200,
          "地": 866,
          "場": -1410,
          "塁": -2094,
          "士": -1413,
          "多": 1067,
          "大": 571,
          "子": -4802,
          "学": -1397,
          "定": -1057,
          "寺": -809,
          "小": 1910,
          "屋": -1328,
          "山": -1500,
          "島": -2056,
          "川": -2667,
          "市": 2771,
          "年": 374,
          "庁": -4556,
          "後": 456,
          "性": 553,
          "感": 916,
          "所": -1566,
          "支": 856,
          "改": 787,
          "政": 2182,
          "教": 704,
          "文": 522,
          "方": -856,
          "日": 1798,
          "時": 1829,
          "最": 845,
          "月": -9066,
          "木": -485,
          "来": -442,
          "校": -360,
          "業": -1043,
          "氏": 5388,
          "民": -2716,
          "気": -910,
          "沢": -939,
          "済": -543,
          "物": -735,
          "率": 672,
          "球": -1267,
          "生": -1286,
          "産": -1101,
          "田": -2900,
          "町": 1826,
          "的": 2586,
          "目": 922,
          "省": -3485,
          "県": 2997,
          "空": -867,
          "立": -2112,
          "第": 788,
          "米": 2937,
          "系": 786,
          "約": 2171,
          "経": 1146,
          "統": -1169,
          "総": 940,
          "線": -994,
          "署": 749,
          "者": 2145,
          "能": -730,
          "般": -852,
          "行": -792,
          "規": 792,
          "警": -1184,
          "議": -244,
          "谷": -1000,
          "賞": 730,
          "車": -1481,
          "軍": 1158,
          "輪": -1433,
          "込": -3370,
          "近": 929,
          "道": -1291,
          "選": 2596,
          "郎": -4866,
          "都": 1192,
          "野": -1100,
          "銀": -2213,
          "長": 357,
          "間": -2344,
          "院": -2297,
          "際": -2604,
          "電": -878,
          "領": -1659,
          "題": -792,
          "館": -1984,
          "首": 1749,
          "高": 2120,
          "「": 1895,
          "」": 3798,
          "・": -4371,
          "ッ": -724,
          "ー": -11870,
          "カ": 2145,
          "コ": 1789,
          "セ": 1287,
          "ト": -403,
          "メ": -1635,
          "ラ": -881,
          "リ": -541,
          "ル": -856,
          "ン": -3637
        };
        this.UW5__ = {
          ",": 465,
          ".": -299,
          "1": -514,
          "E2": -32768,
          "]": -2762,
          "、": 465,
          "。": -299,
          "「": 363,
          "あ": 1655,
          "い": 331,
          "う": -503,
          "え": 1199,
          "お": 527,
          "か": 647,
          "が": -421,
          "き": 1624,
          "ぎ": 1971,
          "く": 312,
          "げ": -983,
          "さ": -1537,
          "し": -1371,
          "す": -852,
          "だ": -1186,
          "ち": 1093,
          "っ": 52,
          "つ": 921,
          "て": -18,
          "で": -850,
          "と": -127,
          "ど": 1682,
          "な": -787,
          "に": -1224,
          "の": -635,
          "は": -578,
          "べ": 1001,
          "み": 502,
          "め": 865,
          "ゃ": 3350,
          "ょ": 854,
          "り": -208,
          "る": 429,
          "れ": 504,
          "わ": 419,
          "を": -1264,
          "ん": 327,
          "イ": 241,
          "ル": 451,
          "ン": -343,
          "中": -871,
          "京": 722,
          "会": -1153,
          "党": -654,
          "務": 3519,
          "区": -901,
          "告": 848,
          "員": 2104,
          "大": -1296,
          "学": -548,
          "定": 1785,
          "嵐": -1304,
          "市": -2991,
          "席": 921,
          "年": 1763,
          "思": 872,
          "所": -814,
          "挙": 1618,
          "新": -1682,
          "日": 218,
          "月": -4353,
          "査": 932,
          "格": 1356,
          "機": -1508,
          "氏": -1347,
          "田": 240,
          "町": -3912,
          "的": -3149,
          "相": 1319,
          "省": -1052,
          "県": -4003,
          "研": -997,
          "社": -278,
          "空": -813,
          "統": 1955,
          "者": -2233,
          "表": 663,
          "語": -1073,
          "議": 1219,
          "選": -1018,
          "郎": -368,
          "長": 786,
          "間": 1191,
          "題": 2368,
          "館": -689,
          "1": -514,
          "E2": -32768,
          "「": 363,
          "イ": 241,
          "ル": 451,
          "ン": -343
        };
        this.UW6__ = {
          ",": 227,
          ".": 808,
          "1": -270,
          "E1": 306,
          "、": 227,
          "。": 808,
          "あ": -307,
          "う": 189,
          "か": 241,
          "が": -73,
          "く": -121,
          "こ": -200,
          "じ": 1782,
          "す": 383,
          "た": -428,
          "っ": 573,
          "て": -1014,
          "で": 101,
          "と": -105,
          "な": -253,
          "に": -149,
          "の": -417,
          "は": -236,
          "も": -206,
          "り": 187,
          "る": -135,
          "を": 195,
          "ル": -673,
          "ン": -496,
          "一": -277,
          "中": 201,
          "件": -800,
          "会": 624,
          "前": 302,
          "区": 1792,
          "員": -1212,
          "委": 798,
          "学": -960,
          "市": 887,
          "広": -695,
          "後": 535,
          "業": -697,
          "相": 753,
          "社": -507,
          "福": 974,
          "空": -822,
          "者": 1811,
          "連": 463,
          "郎": 1082,
          "1": -270,
          "E1": 306,
          "ル": -673,
          "ン": -496
        };
    
        return this;
      }
    
      TinySegmenter.prototype.ctype_ = function(str) {
        for (var i in this.chartype_) {
          if (str.match(this.chartype_[i][0])) {
            return this.chartype_[i][1];
          }
        }
        return "O";
      };
    
      TinySegmenter.prototype.ts_ = function(v) {
        if (v) {
          return v;
        }
        return 0;
      };
    
      TinySegmenter.prototype.segment = function(input) {
        if (input == null || input == undefined || input == "") {
          return [];
        }
        var result = [];
        var seg = ["B3", "B2", "B1"];
        var ctype = ["O", "O", "O"];
        var o = input.split("");
        for (i = 0; i < o.length; ++i) {
          seg.push(o[i]);
          ctype.push(this.ctype_(o[i]));
        }
        seg.push("E1");
        seg.push("E2");
        seg.push("E3");
        ctype.push("O");
        ctype.push("O");
        ctype.push("O");
        var word = seg[3];
        var p1 = "U";
        var p2 = "U";
        var p3 = "U";
        for (var i = 4; i < seg.length - 3; ++i) {
          var score = this.BIAS__;
          var w1 = seg[i - 3];
          var w2 = seg[i - 2];
          var w3 = seg[i - 1];
          var w4 = seg[i];
          var w5 = seg[i + 1];
          var w6 = seg[i + 2];
          var c1 = ctype[i - 3];
          var c2 = ctype[i - 2];
          var c3 = ctype[i - 1];
          var c4 = ctype[i];
          var c5 = ctype[i + 1];
          var c6 = ctype[i + 2];
          score += this.ts_(this.UP1__[p1]);
          score += this.ts_(this.UP2__[p2]);
          score += this.ts_(this.UP3__[p3]);
          score += this.ts_(this.BP1__[p1 + p2]);
          score += this.ts_(this.BP2__[p2 + p3]);
          score += this.ts_(this.UW1__[w1]);
          score += this.ts_(this.UW2__[w2]);
          score += this.ts_(this.UW3__[w3]);
          score += this.ts_(this.UW4__[w4]);
          score += this.ts_(this.UW5__[w5]);
          score += this.ts_(this.UW6__[w6]);
          score += this.ts_(this.BW1__[w2 + w3]);
          score += this.ts_(this.BW2__[w3 + w4]);
          score += this.ts_(this.BW3__[w4 + w5]);
          score += this.ts_(this.TW1__[w1 + w2 + w3]);
          score += this.ts_(this.TW2__[w2 + w3 + w4]);
          score += this.ts_(this.TW3__[w3 + w4 + w5]);
          score += this.ts_(this.TW4__[w4 + w5 + w6]);
          score += this.ts_(this.UC1__[c1]);
          score += this.ts_(this.UC2__[c2]);
          score += this.ts_(this.UC3__[c3]);
          score += this.ts_(this.UC4__[c4]);
          score += this.ts_(this.UC5__[c5]);
          score += this.ts_(this.UC6__[c6]);
          score += this.ts_(this.BC1__[c2 + c3]);
          score += this.ts_(this.BC2__[c3 + c4]);
          score += this.ts_(this.BC3__[c4 + c5]);
          score += this.ts_(this.TC1__[c1 + c2 + c3]);
          score += this.ts_(this.TC2__[c2 + c3 + c4]);
          score += this.ts_(this.TC3__[c3 + c4 + c5]);
          score += this.ts_(this.TC4__[c4 + c5 + c6]);
          //  score += this.ts_(this.TC5__[c4 + c5 + c6]);
          score += this.ts_(this.UQ1__[p1 + c1]);
          score += this.ts_(this.UQ2__[p2 + c2]);
          score += this.ts_(this.UQ3__[p3 + c3]);
          score += this.ts_(this.BQ1__[p2 + c2 + c3]);
          score += this.ts_(this.BQ2__[p2 + c3 + c4]);
          score += this.ts_(this.BQ3__[p3 + c2 + c3]);
          score += this.ts_(this.BQ4__[p3 + c3 + c4]);
          score += this.ts_(this.TQ1__[p2 + c1 + c2 + c3]);
          score += this.ts_(this.TQ2__[p2 + c2 + c3 + c4]);
          score += this.ts_(this.TQ3__[p3 + c1 + c2 + c3]);
          score += this.ts_(this.TQ4__[p3 + c2 + c3 + c4]);
          var p = "O";
          if (score > 0) {
            result.push(word);
            word = "";
            p = "B";
          }
          p1 = p2;
          p2 = p3;
          p3 = p;
          word += seg[i];
        }
        result.push(word);
    
        return result;
      };
    
      var segmenter = new TinySegmenter();
      var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, {
        acceptNode : function (node) { return NodeFilter.FILTER_ACCEPT; }
      }, true);
    
      while (walker.nextNode()) with (walker.currentNode) {
        var segs = segmenter.segment(nodeValue);
        nodeValue = function() {
          var result = '';
          segs.forEach(function(seg) {
            var w = '';
            for (var i = 0, length = Math.floor(Math.random() * 4) + 1; i < length; i++) {
              w = w + 'w';
            }
            result = result + seg + w;
          });
          return result;
        }();
      }
    }());
    
  • Permalink
    このページへの個別リンクです。
    RAW
    書かれたコードへの直接のリンクです。
    Packed
    文字列が圧縮された書かれたコードへのリンクです。
    Userscript
    Greasemonkey 等で利用する場合の .user.js へのリンクです。
    Loader
    @require やソースコードが長い場合に多段ロードする Loader コミのコードへのリンクです。
    Metadata
    コード中にコメントで @xxx と書かれたメタデータの JSON です。

History

  1. 2014/12/05 12:44:49 - 2014-12-05
  2. 2014/12/04 19:46:09 - 2014-12-04
  3. 2014/12/04 17:36:47 - 2014-12-04
  4. 2014/12/04 17:33:51 - 2014-12-04
  5. 2014/12/04 17:02:52 - 2014-12-04
  6. 2014/12/04 16:59:17 - 2014-12-04
  7. 2014/12/04 16:50:15 - 2014-12-04
  8. 2014/12/04 16:04:18 - 2014-12-04
  9. 2014/12/04 16:03:33 - 2014-12-04
  10. 2014/12/04 16:00:07 - 2014-12-04