{"_id":"readabilitySAX","_rev":"92-dd813fd302161b0dc0bc8b26ac001a2d","name":"readabilitySAX","dist-tags":{"latest":"1.6.1"},"versions":{"0.1.0":{"name":"readabilitySAX","version":"0.1.0","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.1.0","dist":{"shasum":"5b6081466b97d8aea7d6bb70ebc7ec69e9bd4aef","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.1.0.tgz","integrity":"sha512-HfCW60oK9EYJMiNcPCJEet4VREcZ0ZKPqArO+DEaKIhmhHKb6kV8fQz8t8pp/W4jo0JhkWDN0Cpom/tH1ErTCQ==","signatures":[{"sig":"MEUCIH/VLauaFY2Wia2HgvuzwenaBrO1Lf24+vVdsQKyCmyXAiEA8rs8IRVDFdrFdjCDaDKIOEwv1MdWrHVE2UHpKGRUuxk=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"sax":">= 0.2.2"},"_defaultsLoaded":true,"_engineSupported":true},"0.1.1":{"name":"readabilitySAX","version":"0.1.1","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.1.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"f739815ecd9bac7f704edf16d6b8096837f299f4","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.1.1.tgz","integrity":"sha512-oIviwWpBOOyn2VUcmixo8fVuoUQHQBQ6bANqiCmXamJS0M4I8KAe8ZM3P4Hai2o4NGxjC6hBA53yl9qgkWOGzA==","signatures":[{"sig":"MEQCIFvktD97uA8+XJfwEBhh18ljARJQQScVIr4sQ/Ngw45dAiB4TU47s0nbDY3eTsVwGzyXX0NaTjTBzl4+O/QCdt+B+g==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"sax":">= 0.2.2"},"_defaultsLoaded":true,"_engineSupported":true},"0.2.0":{"name":"readabilitySAX","version":"0.2.0","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.2.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"6f0b8c4335d9fb22411fafec7e33a956da3ade93","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.2.0.tgz","integrity":"sha512-IrEJTh1A/qqQ3ElcqrYl7GfGQRsxHaxJYizOGZqMRsKdVbSiG+nANRnR3va4HyJ2BfnO2XlelUePEIMELmGm0A==","signatures":[{"sig":"MEUCIQC84SRysed/aZSDtXqVoQ3P0+dAc22d/DAOCRYAcuQvRAIgQDqAHB6Sn/3M+WybAGcd7jttEkgctd4GlP4a1tIn21c=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"sax":">= 0.2.2"},"_defaultsLoaded":true,"_engineSupported":true},"0.2.1":{"name":"readabilitySAX","version":"0.2.1","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.2.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"15c59bd40577d9ad816ad56dcf938119cf91bd27","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.2.1.tgz","integrity":"sha512-03PZ0b/nO1qlsCX0hoSlw6Otlaq7RDIgPVk1NqyUON8LTJ8G9uVBWOghzTZCtX2g7aTqCqjLe7FZRwTcT940SA==","signatures":[{"sig":"MEYCIQC4IbgjhdawZZ/o6LD5R8oiXG2ba1YhF3UeORTq+/pPXgIhAOAkFwr8rqGvCzIZgctWkTu36FdAK2NyyGBfRSKXHeie","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"sax":">= 0.2.2","request":">= 2.1.1","htmlparser2":">= 1.8.0"},"_defaultsLoaded":true,"_engineSupported":true},"0.2.2":{"name":"readabilitySAX","version":"0.2.2","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.2.2","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"1ccb350d6a2a9d816d1826344bcfbb6caecc2a27","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.2.2.tgz","integrity":"sha512-F0SPEW74VUSri1jGpHXtifRdlPbo1y1jAWEsUFl3k8QPyG+ah2AVCiXA5ShnwpqE4rfULwVXKCMHWGWuCnnp4A==","signatures":[{"sig":"MEUCIFtp5Y4bLNFu2J66rl4q2gM1ujAOFHeB5w2u+Ku8EsGcAiEAmCQl9OHx0jpPtsiYCAPmwZFS71vnV0VEfQgyJsoqfw0=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"sax":">= 0.2.2","request":">= 2.1.1","htmlparser2":">= 1.0.0"},"_defaultsLoaded":true,"_engineSupported":true},"0.3.0":{"name":"readabilitySAX","version":"0.3.0","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.3.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"fa5c874fb1f71056d42526c9dc9a1e5c32c5ba0e","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.3.0.tgz","integrity":"sha512-u+uqiwi2T5eIgeU9Rglll88jKm8dsp9dSZV/3+7QmmJsSwUBZWHl/bhnSIt1QtKJAU1JwNOGRqYBl6ayAbKmUQ==","signatures":[{"sig":"MEUCIQC4EiJbgZWzU5H+z9gb40v1npef9146kD0l0eueGCMh4wIgavEOqPUUuiqTgyFiZwZ2DHWNDY1QbF/Ij4/WH0r029A=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"sax":">= 0.2.2","request":">= 2.1.1","htmlparser2":">= 1.0.0"},"_defaultsLoaded":true,"_engineSupported":true},"0.3.1":{"name":"readabilitySAX","version":"0.3.1","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.3.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"88dd57d744fb01800392b0f38bb8707bc4aea480","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.3.1.tgz","integrity":"sha512-Q9U/yEIpnaVnWVwsivCoFK8w4yyP2pmaGY0FLD2bQFN6u2SFvHOFHOhH2MWtDGWkNU61FHBgzovzHWLw0apFZA==","signatures":[{"sig":"MEUCIQCnb324GOZ2w0wdvzF/Yu3YsX6A0gxTRNONCf3lksRDPAIgemOOYj81aBnSu9ZztZL3W4hIVVtDQSrt7jje3m5GqLM=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"request":">= 2.1.1","htmlparser2":">= 1.0.0"},"_defaultsLoaded":true,"_engineSupported":true},"0.3.2":{"name":"readabilitySAX","version":"0.3.2","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.3.2","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"3e362647e26758d68707d1f4f8bc310b24e2e8df","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.3.2.tgz","integrity":"sha512-oHKJS8Sqkd4jeDkwhfCIYRoMEw+S1S1m8PSk5B1GYGFO4c5p5MyVcqRYlqIF6aJiJp2OYsHmtnxFB4dwVSeI5w==","signatures":[{"sig":"MEUCIDJUwvJWRM7SZQ8XXGFDF6NMsKP6daIPl2cbVDFqsxj0AiEArhnBlRyWVXQTkOK/3Vx/yD9tqQgSTTjRrfGoRb+0YUo=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.1rc5","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"request":">= 2.1.1","htmlparser2":">= 1.0.0"},"_defaultsLoaded":true,"_engineSupported":true},"0.3.3":{"name":"readabilitySAX","version":"0.3.3","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.3.3","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"b6aad022504f426ade57d18aaddfefeba6b8663d","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.3.3.tgz","integrity":"sha512-BMxRPkst6Ne0d8aNjf2JibF7O0VYx8S7qQAETQ+pihCKFku+1+WJ058DtBIkVtyInESuSc4E2tfDzdY/n5Dd6Q==","signatures":[{"sig":"MEYCIQCtGY/ffMxSnJaSjwAeJyXTX4MsW8d6bnOq81mYfAFsxgIhANv5xaBo8/WVJLb18oDpcIs5K6liIx714WhOLZTy0Pze","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./readabilitySAX.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./node_examples/mkread.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.103","description":"the readability script ported to a sax parser","directories":{},"_nodeVersion":"v0.4.10","dependencies":{"request":">= 2.1.1","htmlparser2":">= 1.0.0"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true},"0.5.0":{"name":"readabilitySAX","version":"0.5.0","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.5.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"506942bb487c05bc4e2046bc6a31d85c71d8efbc","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.5.0.tgz","integrity":"sha512-BNT2wZJ1wJKBNB0m8avDH5b2BwEB+wr0MxEfrO39ZRRVKXad4k+LNt5IoOT7BW2VF0CK5dOsk1Wq1R/psSVgqA==","signatures":[{"sig":"MEYCIQDfjngMHHdBxU7q8lcJIi5JfW/ZPeGIT9Ubi6ppTQu8eQIhALa4yky3PGwt+E4oiLzyAY6NTzCntPysC0TJslAlZZ8V","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.2","dependencies":{"request":">= 2.1.1","htmlparser2":">= 1.9.0"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"0.6.0":{"name":"readabilitySAX","version":"0.6.0","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.6.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"754200f10a0bedee3fc7227574d4138230830cd3","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.6.0.tgz","integrity":"sha512-IAxOiMdSkBqvvzIK3zv8xfaRY3RY9kHdL5uqFdlBnxd0/fS3NWF0bDbOtpeTqfdxsmB7s9m/uXAxnpjMRVeukw==","signatures":[{"sig":"MEUCIQD0lwSpQBTacWG8wFx8JuvlrqEuhqYriRN05fhyfKDrpwIgMNu5XIOerjBbSxySQLcpBkwsKMIPjDxgomAbPK6vN/w=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.2","dependencies":{"request":">= 2.1.1","htmlparser2":">= 1.9.0"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"0.6.1":{"name":"readabilitySAX","version":"0.6.1","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@0.6.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"4214c19bc07634c46e87d92ce1b5e8dc6b1ecdf8","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-0.6.1.tgz","integrity":"sha512-d6FQDhqYBeXUXlx81swz6e9+iAjlN/7pze6Gh4xofMUZIfttgeLVi8p7QdGHEzkdvbz2cVHybZoF1jhHeHrE3A==","signatures":[{"sig":"MEYCIQC/8a0bjRVsFeRnu4PSEzcjrI+bEXYIB7mXRmLBNLPl3wIhAJkWGf+Y500d1JLW8wMZ47R1fh2hJoPAM/3l3VpPfjWv","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.2","dependencies":{"request":">= 2.1.1","htmlparser2":">= 1.9.0"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.0.0":{"name":"readabilitySAX","version":"1.0.0","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.0.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"83457552a7964b6197c21c953d78088b2066bee3","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.0.0.tgz","integrity":"sha512-heufi5eVaVJQ/KGX6GTX8n/pD2M9t8adtbSPTtqwm2tiJPiRD8tiWkzFsoBw9yopxwsz7aP+qlAFNwXqkg8BAA==","signatures":[{"sig":"MEQCIE1FbmfaMYh8jmPN1PPylG4CNvctNkNIHXSqWtl56ssgAiAoiXkgG2qloIzo+xI6B79NhM8/VwJTI2PJlWammOToSA==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"*","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.4","dependencies":{"htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.0.1":{"name":"readabilitySAX","version":"1.0.1","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.0.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"e32b2a6dce6e1a28cc47a8c8b6ce3a27d291592b","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.0.1.tgz","integrity":"sha512-hfwIJPGhw9px0lFgJPVgHFtzEactry0+1eamiZOACA+VqgPGBoV6xRgJYedXLdkh3/MHP+XICAwspqaOBWXZHw==","signatures":[{"sig":"MEUCIEpL4bU4dmXRq6mUjaQfuc28ZQ9FBjZF64V0mbbFxB+RAiEAzQo+6ikx8dzI3oWE8eX23JbrZnAlbCj5sdCAsv8EcBE=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.6.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.0.2":{"name":"readabilitySAX","version":"1.0.2","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.0.2","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"a3bb74f03bf2ed04ec72b9bd5643892d46f06eff","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.0.2.tgz","integrity":"sha512-FnkHmuiyBnjk7eVkzQS9jnM/2+/wLUPYbTwK0/VMk0rAf44KP01v9f/CS8lnq/RZu16t5W6RSzDLJZiOIjVacQ==","signatures":[{"sig":"MEYCIQC8cGIsCj2rq8kGxie4p9c6Wflx91mIFUGyT6LxLy9ncQIhAPn6zWAtgxWdoYMqJqn4zaeAW1u5SrjAc6j5PE3E6pG/","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.0.3":{"name":"readabilitySAX","version":"1.0.3","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.0.3","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"46d667f9c2767cebd5082fb4fff881911b4ef5e1","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.0.3.tgz","integrity":"sha512-3ThYqiG2fvHrqnvn8/wxqgGiys+h5Yr40u/bWcPlPuub9N9lvtK+3ADmzVH2WExa4rAIXQLrTac0cLEUgnQbZw==","signatures":[{"sig":"MEUCIB9W01H5AghoQVKrOdLTDMgZvyW6YgErKvj20EWFC3wbAiEAt3lexUVYXaoVoSofLDxmR0e5m75O5fCiDHDoiDgaLnE=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.0.4":{"name":"readabilitySAX","version":"1.0.4","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.0.4","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"d93606f6cf3dfdf04d14933c186bfabc54bafe7d","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.0.4.tgz","integrity":"sha512-qSiMds+S/WSBP3jSc0IRUQf7BFPk/3HuBj74nWUNYGs1mUorADbv1y7KJPKbfc6IsBD1tI5wHseB9Sotr7gVow==","signatures":[{"sig":"MEUCIQDnp55CAR+TPKEGMKKoHnEV47drGaYB1HBTVZ1mgpM/wQIgWd+JLEBo16h3ua/ZYihCt/Kw14+ZhhM+WbXWX284h0M=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.0.5":{"name":"readabilitySAX","version":"1.0.5","author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.0.5","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"c35156439f15316306c001f88691ea22d2beaede","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.0.5.tgz","integrity":"sha512-OVNMWX32hbty8ZwtWWylwzIxlBW1+Jj0iH+lgGl2PzGlOFg/s58Xtp12q6NZfO29K4/ArgvHxCMADF7P6XY+2w==","signatures":[{"sig":"MEYCIQDn5rm5zqhtMiGrbL/6kw1IQYVl6NZTKEBQCgJbsSJ/9AIhAMn1OmSMv/pQAG6simwka0SXvR/0vIe3HANzfkDR815f","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.1.0":{"name":"readabilitySAX","version":"1.1.0","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.1.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"dist":{"shasum":"657d4ed73fdadb02519783673dbc713ecdf1bb76","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.1.0.tgz","integrity":"sha512-C+3lBW9GvP98aWNN8XrOwcj1Nu5JeCyl2W5Hac1OqinGDArvsYjNkldB75T6NJ5E2sz8YnMYiSmAAyyAyqnU7w==","signatures":[{"sig":"MEYCIQDZkhuoAiMjwudDmY2F0HWXbYc0byejaKg+7JSAhqGWRwIhAOyhavYzgRHsxtrpC95zBHZwXpbV+4fQSErv/XVJMuqX","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.2.0":{"name":"readabilitySAX","version":"1.2.0","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.2.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"e3e3a83def7a4827f6c190d5b06dea738cfdf8ac","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.2.0.tgz","integrity":"sha512-ENQCNNmO2FGhnvR5tYXF7lPAWs/gfUw8NUbIyfWG1WR/HhNINpgxBvROSYF9DFUR7YmIs9KupxQVp11udPgktg==","signatures":[{"sig":"MEUCIAoWXz5egmbM0RuhBnhBMTd30bu/J911Ey96yNp/P21mAiEAxVNEFqAt088x7LrFgX4RuZAlWTXhr8Lx9QkDOernTSw=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"minreq":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.2.1":{"name":"readabilitySAX","version":"1.2.1","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.2.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"d0420888423686d913bb0ac52a1fe2f97ca3f266","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.2.1.tgz","integrity":"sha512-zdWGMGYQxSj7jH1Ud10i1Ioaa+d5IBZ9sqIzEerzDxQMrAA5IcxbEs4O2TDKQfEtQCD7c/VsGsjcYDGAlAxhQQ==","signatures":[{"sig":"MEYCIQCMqyh0B/UpRvAPsLOqrF10MsHVogfFo3v4PvhVunq53QIhAKFAPE6hnOX4niBacURo9FL3EJXHUyNsIigU8z9tnvJG","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"minreq":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.2.2":{"name":"readabilitySAX","version":"1.2.2","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.2.2","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"2bb3ad37c9d001c24508ff9a046670a8557bed19","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.2.2.tgz","integrity":"sha512-85Mvn/nH2QY+nWZTd1invlHovu1ESOcKozBdz9mTxpZMxzYFYgDxVUob3cmU6wYKxhxypJvUiuz4HC1+iCm+9A==","signatures":[{"sig":"MEUCIGt79NFtfTnBmUXoab8fiOvfM1uwev+Q4SnBUlAGUoLIAiEA65Xj2YQxs07iHx0PFvvmXFCxxFkNaRgiZGDSLSPI2dE=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.6","dependencies":{"minreq":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.3.0":{"name":"readabilitySAX","version":"1.3.0","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.3.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"ede34fa94342b49b81ebb5cd02f7ac0e6140c5bc","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.3.0.tgz","integrity":"sha512-jOA9lLZ3UMtVpw0IIZ47Z2EghJGT/iRfR5mSPc3trB3YOIkgotrw9GMrmgFjC8RGohOw4/2zJNdJNdkxxaLa/Q==","signatures":[{"sig":"MEQCIE0ASBhfDuF1YQS8vdjFrCfNdtGQnFRjXT5BDhyeclq8AiALVMRZ8awvGabZA1yRBdAv2Ac2l7Dsel3hN4xd7+BAWw==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.7","dependencies":{"minreq":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.3.1":{"name":"readabilitySAX","version":"1.3.1","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.3.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"efd95fbe568563c60dedfc0320ff88d204f09231","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.3.1.tgz","integrity":"sha512-gRAMlcegDOiXzOohDdhsrrnWTctfavTkZMuCiLY1XIxDK8i2gYxWmZBnEjbZtRfF9THI9zD9mSGdI6vExRnvww==","signatures":[{"sig":"MEQCICQHgXzxSno1w3NfbqWocv2K8lIQMVw1FLPax3srSJYrAiBoNRQYLdK3E1EI04sHdWwEQWiA9CgGDmWuJ7QJfega+g==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.7","dependencies":{"minreq":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.3.2":{"name":"readabilitySAX","version":"1.3.2","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.3.2","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"c4a0f77e3b11a5846ff9338d8770435d49a0881b","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.3.2.tgz","integrity":"sha512-7uF+1TXzoCGzsE8Uvukx/ANuOrIISJViPbfIVJ2OfAUzsea9LA8f3t0aB1WH/mh/NNzXd6W3SBkQIhnYlGmH8Q==","signatures":[{"sig":"MEUCIQCB3JNBYS3mQiMfbmYgqvo6sivYGgl0fVmE5g/9xhD04wIgK/NH3Q39yXVMkSliS5exfihhpjD8/QVPgug8kg8ku+k=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.7","dependencies":{"minreq":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.3.3":{"name":"readabilitySAX","version":"1.3.3","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.3.3","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"d3883816cc038d561aa809bf3e2f79865605561b","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.3.3.tgz","integrity":"sha512-IZdC0PJiGGwgYv6Wb39PALQ34vZ3vttr8O8fv1984BopGYY3zB8+Qo40Lh4ETgaceHnixeLcAR92m0+dsFsD9Q==","signatures":[{"sig":"MEUCIQCKdBtNCL9RwwHJgkxq0hasb98ZtRZ4oOzl8L9BWDKwcwIgHQZxdRFSWYInF/2siYy1eIz6ST1aI5YtkFQK4njmxaQ=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.0.106","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.7","dependencies":{"minreq":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"directories":{}},"1.4.0":{"name":"readabilitySAX","version":"1.4.0","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.4.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"e505cb558d2d9ea11d320e616e2dd37d05459284","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.4.0.tgz","integrity":"sha512-a/R5QAFl3w44eqI3W4q6Reyv9bwhLyMRAAkckT+gPIOXDZ83hKXXZwp4RMibACYKdOAGwS2chnnV9hYTaRL0ew==","signatures":[{"sig":"MEQCICnVfObOvK5ZGLBioWCDhHYEaxyhGz8bd60xjsOOqbmtAiBd2onKdPm7811ehF7CHE+Rq/aVr0peD//5UsDJhP1A+w==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `get(link, callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n* `Readability(settings)`: The readability object. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm. \n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Improve the performance (always)","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.0-3","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.10","dependencies":{"minreq":"0.x.x","entities":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"optionalDependencies":{},"directories":{}},"1.4.1":{"name":"readabilitySAX","version":"1.4.1","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.4.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"4cd72d42d6b285d65efb8bb81d7905f9311c6e2e","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.4.1.tgz","integrity":"sha512-cV4nI23u5IGD5I/g6ughLnx2vtgBfnvzBENIyK7sKMGqXq7XTd1ccdXbfaO1vlonlWmYoQymN315WyCOZmnf9Q==","signatures":[{"sig":"MEUCIHBSgkM6cFMXDIwiQlxAiezQ4e3tyzmKuMcbzKpfpGOvAiEAtzMGjGMN5tSFDZp+tjt0q/RMPCZIPJTeRFzKAiww7yE=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `get(link, callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n* `Readability(settings)`: The readability object. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm. \n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Improve the performance (always)","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.1","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.10","dependencies":{"minreq":"0.x.x","entities":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"optionalDependencies":{},"directories":{}},"1.4.2":{"name":"readabilitySAX","version":"1.4.2","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.4.2","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"b4e97ec91fd278b2cdfedef9a35681f07ce84133","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.4.2.tgz","integrity":"sha512-SPFRRKfnIPizdJO8yOtA5ee5ezcdF0sBDekgsvGaIfpZ9SoILmInXExbRT7sCCQls3Vq/RQgdrMtlQAsXMDMxg==","signatures":[{"sig":"MEUCIBAupVYDxyYxoo0ddKRhaFFp/hsVLI96B2HuJGVWvKo/AiEAkxYs2t+wxnKbua6xFdCn4Mg0enU2fYfXK4mK0SIuJIc=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `get(link, callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n* `Readability(settings)`: The readability object. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm. \n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Improve the performance (always)","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.3","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.10","dependencies":{"minreq":"0.x.x","entities":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"optionalDependencies":{},"directories":{}},"1.4.3":{"name":"readabilitySAX","version":"1.4.3","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.4.3","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"2821202b1c7bc09c46279f690d94523b644f1ce1","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.4.3.tgz","integrity":"sha512-bvMOmpKn4NMC+adI0tTbc7w6wAmntlCCdoiPHOwm/DmAEi0WG6zTyaNfNWlGqCGrg7oVhyJ0yB8jVdck/hlVAA==","signatures":[{"sig":"MEQCICZchgx7/0Zf31r6rcDS63uQrcDoG4cyi0G53wgWp8nAAiBMpo2j+0cV4R4N36ePNk4zCNbOihBjtDXvuw4TT5ilCQ==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `get(link, callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n* `Readability(settings)`: The readability object. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm. \n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Improve the performance (always)","engines":{"node":"*"},"scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.3","description":"the readability script ported to a sax parser","_nodeVersion":"v0.6.12","dependencies":{"minreq":"0.x.x","entities":"0.x.x","htmlparser2":"2.x.x"},"_defaultsLoaded":true,"devDependencies":{},"_engineSupported":true,"optionalDependencies":{},"directories":{}},"1.4.4":{"name":"readabilitySAX","version":"1.4.4","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.4.4","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"38e821173971fea3d0e8e7a7a2f1313aef38883a","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.4.4.tgz","integrity":"sha512-WeVy8djkMRuubfAiAF1e4b0hM4ojZerl7VYxFtNhScQSf8Z52ShQlN3UlYt27htdtF+sX8a0g8SdQTN4u+YaNg==","signatures":[{"sig":"MEQCIED/OKt+wx8qe1kiBOOlnFxruwfZfde/MfhXF4ONIXG4AiBpGhPTo9/mSfNMvb6179+b5JQJhLOTqDqiVEwWaN3lpg==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `get(link, callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n* `Readability(settings)`: The readability object. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm.\n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Add support for URLs containing hash-bangs (`#!`)\n- Allow fetching articles with more than one page\n- Don't remove all images inside `<a>` tags\n- Add a `user-agent` header with usable contents","scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.59","description":"the readability script ported to a sax parser","dependencies":{"minreq":"0.x.x","entities":"0.x.x","htmlparser2":"2.x.x"},"directories":{}},"1.4.5":{"name":"readabilitySAX","version":"1.4.5","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.4.5","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"3ef9b23dcf60e3ff25060db46214a752757fd5a1","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.4.5.tgz","integrity":"sha512-JlvF3qn3E/cTvkiNm7d156cTmonPSNheWHpT7Pma5YyoFWLwBWYTT6KHHGFiOp7s1Z+DmoLsdfFpfCOd7T1l4A==","signatures":[{"sig":"MEQCIDJqcnPycZVbz0//jLh7BVd14RQl1aLsdX03PMpCXM4YAiA6BI8QpCnsVFv+yl/YbXd25SftXHtVIHbpA/S2QSWgnQ==","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `get(link, callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n* `Readability(settings)`: The readability object. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm.\n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Add support for URLs containing hash-bangs (`#!`)\n- Allow fetching articles with more than one page\n- Don't remove all images inside `<a>` tags\n- Add a `user-agent` header with usable contents","scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.59","description":"the readability script ported to a sax parser","dependencies":{"minreq":"0.x.x","entities":"0.x.x","htmlparser2":"2.x.x"},"directories":{}},"1.5.1":{"name":"readabilitySAX","version":"1.5.1","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.5.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"a4627be56f85e39977d5ccadd3606063b1552f94","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.5.1.tgz","integrity":"sha512-LdpbEt64lOymqd1vle566tG8TeE5Xpo9j8S8c92Gbgk293Vz6dBwdFN6V83IYy0fDfOtyHw2bhl3D++9Ev0SBQ==","signatures":[{"sig":"MEUCIQCfE2fHNc7bCw7Mp3J3cbfWsECmFrYvhQ0vCzepOxjv7wIgL62BSp6dy2dKToMEchgCujECC5lUxFuGqKzuCBccid8=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `Readability(settings)`: The readability constructor. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n* `WritableStream(settings, cb)`: A constructor that unites `htmlparser2` and the `Readability` constructor. It's a writable stream, so simply `.write` all your data to it. Your callback will be called once `.end` was called. Bonus: You can also `.pipe` data into it!\n\n* `createWritableStream(settings, cb)`: Returns a new instance of the `WritableStream`. (It's a simple factory method.)\n\nThere are two methods available that are deprecated and __will be removed__ in a future version:\n\n* `get(link, [settings], callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n__Please don't use those two methods anymore__. Streams are the way you should build interfaces in node, and that's what I want encourage people to use.\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm.\n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Add support for URLs containing hash-bangs (`#!`)\n- Allow fetching articles with more than one page\n- Don't remove all images inside `<a>` tags\n- Add a `user-agent` header with usable contents","scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.59","description":"the readability script ported to a sax parser","dependencies":{"minreq":"~0.1.6","entities":"0.x.x","htmlparser2":"2.x.x"},"directories":{}},"1.5.2":{"name":"readabilitySAX","version":"1.5.2","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.5.2","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"d1daaa27ce40df3836b453b3ef4d90d3ccc0ebaa","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.5.2.tgz","integrity":"sha512-Vjm08BQna4ImOeZCamEfmgTYseiC3ss7uiGmzJla2mY3aopP22cLY5Uo7ucqyuA3hyPYVXje67iZcnkeI2pbIw==","signatures":[{"sig":"MEUCIQDLwJ68+GX/91LSXrwZLX+xIQ8KiAJH3NDVo32bs6ETKAIgZGLWt5+7vSQChzeV9bI/WVKn7AWPbndwLnWmvN7JcCg=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `Readability(settings)`: The readability constructor. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n* `WritableStream(settings, cb)`: A constructor that unites `htmlparser2` and the `Readability` constructor. It's a writable stream, so simply `.write` all your data to it. Your callback will be called once `.end` was called. Bonus: You can also `.pipe` data into it!\n\n* `createWritableStream(settings, cb)`: Returns a new instance of the `WritableStream`. (It's a simple factory method.)\n\nThere are two methods available that are deprecated and __will be removed__ in a future version:\n\n* `get(link, [settings], callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n__Please don't use those two methods anymore__. Streams are the way you should build interfaces in node, and that's what I want encourage people to use.\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm.\n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Add support for URLs containing hash-bangs (`#!`)\n- Allow fetching articles with more than one page\n- Don't remove all images inside `<a>` tags","scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.59","description":"the readability script ported to a sax parser","dependencies":{"minreq":"~0.1.6","entities":"0.x.x","htmlparser2":"2.x.x"},"directories":{}},"1.5.3":{"name":"readabilitySAX","version":"1.5.3","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"_id":"readabilitySAX@1.5.3","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"./node/CLI.js"},"dist":{"shasum":"a30dec7f1dc4daea5d52b8beac9014553501405a","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.5.3.tgz","integrity":"sha512-6wJnbN5Wql/EKAWXJWaB9EPs6ngxJTGbF1ppjQVye6pqULD8yiRbWn8BxYGP7zwljKvmXu6MPYkpPykvMBCmsg==","signatures":[{"sig":"MEYCIQD+xyQgGrTDWjiTErgOP9muF1grVr2vMv4o+tCZygoYIQIhALA7SXDhMuCsuxfQsn1rxsqWtUTh1KlvVzWZsrjlHHJw","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"./node/index.js","engine":"node >= 0.4.0","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `Readability(settings)`: The readability constructor. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n* `WritableStream(settings, cb)`: A constructor that unites `htmlparser2` and the `Readability` constructor. It's a writable stream, so simply `.write` all your data to it. Your callback will be called once `.end` was called. Bonus: You can also `.pipe` data into it!\n\n* `createWritableStream(settings, cb)`: Returns a new instance of the `WritableStream`. (It's a simple factory method.)\n\nThere are two methods available that are deprecated and __will be removed__ in a future version:\n\n* `get(link, [settings], callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n__Please don't use those two methods anymore__. Streams are the way you should build interfaces in node, and that's what I want encourage people to use.\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm.\n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Add support for URLs containing hash-bangs (`#!`)\n- Allow fetching articles with more than one page\n- Don't remove all images inside `<a>` tags","scripts":{"test":"node ./tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.1.65","description":"the readability script ported to a sax parser","dependencies":{"minreq":"~0.1.6","entities":"0.x.x","htmlparser2":"2.x.x"},"readmeFilename":"readme.md","directories":{}},"1.6.0":{"name":"readabilitySAX","version":"1.6.0","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"license":"BSD-like","_id":"readabilitySAX@1.6.0","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"node/CLI.js"},"dist":{"shasum":"0a9b3c8bdf58731d89927ba80966dce5acf45a22","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.6.0.tgz","integrity":"sha512-evHLpw/LURv3VGn6nChLFoxdGzluT6i/VFOLk4RH6HNUF2g6EtdXv/8tq6pHNcggfV1Ru+3X5bTZqALnkTNduA==","signatures":[{"sig":"MEUCIHCGMdlZvlWS8WtBmhf6mP2DlRflQ4O2iGBHDfEx2s9vAiEAv+1GLXhEDyaRoTL1bg9B3GV4eimhwwtmcoJ/UnCIYnY=","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"node/index.js","_from":".","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nOne day, I wanted to use [Readability](http://code.google.com/p/arc90labs-readability/), an algorithm to extract relevant pieces of information out of websites, for a node.js project. There are some ports of Readability to node (using jsdom, e.g. [that one](https://github.com/arrix/node-readability)), but they are pretty slow. I don't want to wait for more than a second (literally) until my node instance is ready to continue. So I started this project, porting the code to a SAX parser.\n\nIn my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `Readability(settings)`: The readability constructor. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n* `WritableStream(settings, cb)`: A constructor that unites `htmlparser2` and the `Readability` constructor. It's a writable stream, so simply `.write` all your data to it. Your callback will be called once `.end` was called. Bonus: You can also `.pipe` data into it!\n\n* `createWritableStream(settings, cb)`: Returns a new instance of the `WritableStream`. (It's a simple factory method.)\n\nThere are two methods available that are deprecated and __will be removed__ in a future version:\n\n* `get(link, [settings], callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n__Please don't use those two methods anymore__. Streams are the way you should build interfaces in node, and that's what I want encourage people to use.\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](https://github.com/FB55/node-htmlparser), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm.\n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Add support for URLs containing hash-bangs (`#!`)\n- Allow fetching articles with more than one page\n- Don't remove all images inside `<a>` tags","scripts":{"test":"node tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.2.15","description":"the readability script ported to a sax parser","dependencies":{"minreq":"0.2","entities":"0","htmlparser2":"3.0","readable-stream":"1.0"},"readmeFilename":"readme.md","directories":{}},"1.6.1":{"name":"readabilitySAX","version":"1.6.1","keywords":["html","content extraction","readability","instapaper"],"author":{"name":"Felix Boehm","email":"me@feedic.com"},"license":"BSD-like","_id":"readabilitySAX@1.6.1","maintainers":[{"name":"feedic","email":"me@feedic.com"}],"bin":{"readability":"node/CLI.js"},"dist":{"shasum":"4a040cdeeb52c62d7774da0e1b9708b72cebbd00","tarball":"https://registry.npmjs.org/readabilitySAX/-/readabilitySAX-1.6.1.tgz","integrity":"sha512-Lwu2VN5mM/rvzqxanFID8P7XnoYFn0H37fnQnBeBANRVNJ1TiNJClbambqo2fBMtda/bj//0Shb5MSxTLCtkeQ==","signatures":[{"sig":"MEYCIQCYQTwQMmBquY/kga1qfVHIPShBUY3LwBDv8giwlZEYpQIhAK5EqCe/ZzSICbPgiolBWtxrPnSUeAbtTw0ERDfJqu5+","keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA"}]},"main":"node/index.js","_from":".","readme":"#readabilitySAX\na fast and platform independent readability port\n\n##About\nThis is a port of the algorithm used by the [Readability](http://code.google.com/p/arc90labs-readability/) bookmarklet to extract relevant pieces of information out of websites to a SAX parser.\n\nThe advantage over other ports, e.g. [arrix/node-readability](https://github.com/arrix/node-readability), is a smaller memory footprint and a much faster execution. In my tests, most pages, even large ones, were finished within 15ms (on node, see below for more information). It works with Rhino, so it runs on [YQL](http://developer.yahoo.com/yql \"Yahoo! Query Language\"), which may have interesting uses. And it works within a browser.\n\nThe Readability extraction algorithm was completely ported, but some adjustments were made:\n\n* `<article>` and `<section>` tags are recognized and gain a higher value\n\n* If a heading is part of the pages `<title>`, it is removed (Readability removed any single `<h2>`, and ignored other tags)\n\n* `henry` and `instapaper-body` are classes to show an algorithm like this where the content is. readabilitySAX recognizes them and adds additional points\n\n* Every bit of code that was taken from the original algorithm was optimized, eg. RegExps should now perform faster (they were optimized & use `RegExp#test` instead of `String#match`, which doesn't force the interpreter to build an array)\n\n* Some improvements made by [GGReadability](https://github.com/curthard89/COCOA-Stuff/tree/master/GGReadability) (an Obj-C port of Readability) were adopted\n    * Images get additional scores when their `height` or `width` attributes are high - icon sized images (<= 32px) get skipped\n    * Additional classes & ids are checked\n\n##HowTo\n###Installing readabilitySAX (node)\nThis module is available on `npm` as `readabilitySAX`. Just run \n\n    npm install readabilitySAX\n\n#####CLI\nA command line interface (CLI) may be installed via\n\n    npm install -g readabilitySAX\n\nIt's then available via\n\n    readability <domain> [<format>]\n\nTo get this readme, just run\n\n    readability https://github.com/FB55/readabilitySAX\n\nThe format is optional (it's either `text` or `html`, the default value is `text`).\n\n###Usage\n#####Node\nJust run `require(\"readabilitySAX\")`. You'll get an object containing three methods:\n\n* `Readability(settings)`: The readability constructor. It works as a handler for `htmlparser2`. Read more about it [in the wiki](https://github.com/FB55/readabilitySAX/wiki/The-Readability-constructor)!\n\n* `WritableStream(settings, cb)`: A constructor that unites `htmlparser2` and the `Readability` constructor. It's a writable stream, so simply `.write` all your data to it. Your callback will be called once `.end` was called. Bonus: You can also `.pipe` data into it!\n\n* `createWritableStream(settings, cb)`: Returns a new instance of the `WritableStream`. (It's a simple factory method.)\n\nThere are two methods available that are deprecated and __will be removed__ in a future version:\n\n* `get(link, [settings], callback)`: Gets a webpage and process it.\n\n* `process(data)`: Takes a string, runs readabilitySAX and returns the page.\n\n__Please don't use those two methods anymore__. Streams are the way you should build interfaces in node, and that's what I want encourage people to use.\n\n#####Browsers\n\nI started to implement simplified SAX-\"parsers\" for Rhino/YQL (using E4X) and the browser (using the DOM) to increase the overall performance on those platforms. The DOM version is inside the `/browsers` dir.\n\nA demo of how to use readabilitySAX inside a browser may be found at [jsFiddle](http://jsfiddle.net/pXqYR/embedded/). Some basic example files are inside the `/browsers` directory.\n\n#####YQL\n\nA table using E4X-based events is available as the community table `redabilitySAX`, as well as [here](https://github.com/FB55/yql-tables/tree/master/readabilitySAX).\n\n##Parsers (on node)\nMost SAX parsers (as sax.js) fail when a document is malformed XML, even if it's correct HTML. readabilitySAX should be used with [htmlparser2](http://npm.im/htmlparser2), my fork of the `htmlparser`-module (used by eg. `jsdom`), which corrects most faults. It's listed as a dependency, so npm should install it with readabilitySAX.\n\n##Performance\n\n#####Speed\nUsing a package of 724 pages from [CleanEval](http://cleaneval.sigwac.org.uk) (their website seems to be down, try to google it), readabilitySAX processed all of them in 5768 ms, that's an average of 7.97 ms per page.\n\nThe benchmark was done using `tests/benchmark.js` on a MacBook (late 2010) and is probably far from perfect.\n\nPerformance is the main goal of this project. The current speed should be good enough to run readabilitySAX on a singe-threaded web server with an average number of requests. That's an accomplishment!\n\n#####Accuracy\nThe main goal of CleanEval is to evaluate the accuracy of an algorithm.\n\n___// TODO___\n\n##Todo\n\n- Add documentation & examples\n- Add support for URLs containing hash-bangs (`#!`)\n- Allow fetching articles with more than one page\n- Don't remove all images inside `<a>` tags\n","scripts":{"test":"node tests/test_output.js"},"_npmUser":{"name":"feedic","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"_npmVersion":"1.2.17","description":"the readability script ported to a sax parser","dependencies":{"minreq":"0.2","entities":"0","htmlparser2":"3.0","readable-stream":"1.0"},"readmeFilename":"readme.md","directories":{}}},"time":{"created":"2011-07-31T20:56:03.476Z","modified":"2026-02-21T19:30:49.812Z","0.1.0":"2011-07-31T20:56:04.520Z","0.1.1":"2011-08-06T19:52:35.975Z","0.2.0":"2011-08-28T11:34:03.868Z","0.2.1":"2011-08-28T18:09:00.876Z","0.2.2":"2011-09-07T18:00:11.440Z","0.3.0":"2011-10-21T12:44:29.772Z","0.3.1":"2011-10-21T13:37:26.711Z","0.3.2":"2011-10-21T13:42:46.471Z","0.3.3":"2011-11-05T19:04:46.960Z","0.5.0":"2011-11-27T15:39:38.242Z","0.6.0":"2011-11-30T15:58:52.625Z","0.6.1":"2011-12-02T15:32:55.288Z","1.0.0":"2011-12-16T12:12:58.215Z","1.0.1":"2011-12-16T18:53:26.846Z","1.0.2":"2011-12-17T18:11:56.935Z","1.0.3":"2011-12-20T17:30:27.274Z","1.0.4":"2011-12-20T18:35:47.558Z","1.0.5":"2011-12-21T15:45:39.078Z","1.1.0":"2011-12-31T17:32:14.749Z","1.2.0":"2012-01-07T15:11:45.988Z","1.2.1":"2012-01-07T15:16:13.417Z","1.2.2":"2012-01-07T15:44:53.905Z","1.3.0":"2012-01-13T16:25:35.294Z","1.3.1":"2012-01-14T17:33:42.228Z","1.3.2":"2012-01-19T10:07:28.455Z","1.3.3":"2012-01-20T16:28:42.199Z","1.4.0":"2012-02-10T14:23:57.106Z","1.4.1":"2012-02-18T11:29:55.779Z","1.4.2":"2012-03-04T13:06:22.445Z","1.4.3":"2012-03-15T16:09:35.983Z","1.4.4":"2012-09-09T13:10:17.988Z","1.4.5":"2012-09-21T12:24:27.395Z","1.5.1":"2012-09-21T14:23:00.382Z","1.5.2":"2012-09-21T14:53:45.036Z","1.5.3":"2012-11-10T15:16:08.639Z","1.6.0":"2013-04-04T15:24:44.393Z","1.6.1":"2013-04-09T08:44:19.067Z"},"author":{"name":"Felix Boehm","email":"me@feedic.com"},"repository":{"url":"git://github.com/fb55/readabilitysax.git","type":"git"},"description":"the readability script ported to a sax parser","keywords":["html","content extraction","readability","instapaper"],"license":"BSD-like","maintainers":[{"email":"me@feedic.com","name":"fb55"}],"readme":"ERROR: No README data found!"}