{"_id":"grapheme-splitter","_rev":"6-74b7f107a42ae9c331f6e185593883b5","name":"grapheme-splitter","description":"A JavaScript library that breaks strings into their individual user-perceived characters. It supports emojis!","dist-tags":{"latest":"1.0.4"},"versions":{"1.0.1":{"name":"grapheme-splitter","version":"1.0.1","description":"A JavaScipt library that breaks strings into their individual user-perceived characters. It supports emojis!","homepage":"https://github.com/orling/grapheme-splitter","author":{"name":"Orlin Georgiev"},"contributors":[{"name":"Lucas Tadeu Teixeira","email":"lucas@fastmail.nl","url":"https://lucas.is"}],"main":"index.js","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"test":"tape tests/grapheme_splitter_tests.js"},"repository":{"type":"git","url":"git+https://github.com/orling/grapheme-splitter.git"},"bugs":{"url":"https://github.com/orling/grapheme-splitter/issues"},"dependencies":{},"devDependencies":{"tape":"^4.6.3"},"engines":{"npm":"~7.3.0"},"gitHead":"27cbe6a2606b80f5322b735947ca4b5dd133d915","_id":"grapheme-splitter@1.0.1","_shasum":"445abeddfab3e4a250049978d38990e3c4bd1a7f","_from":".","_npmVersion":"3.10.10","_nodeVersion":"6.11.0","_npmUser":{"name":"orling","email":"orlin.georgiev@gmail.com"},"dist":{"shasum":"445abeddfab3e4a250049978d38990e3c4bd1a7f","tarball":"https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.1.tgz","integrity":"sha512-bIPvRVpzZGK4e8Xh6y2gGQjqXdtDrBJV1AwHVl4FdA+U3ObIQHhDMaS9MPKbO6SElZ0F7BWRuIEfMNUK+2X2kQ==","signatures":[{"keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA","sig":"MEYCIQCIjOwWijQAWcULTJ4L3IUfzxfio2EoLUY18/3WyFSHcQIhAMHqkgcc4kXg2M0vICIy9uM5klgsX4LCXgOuuE+D5spp"}]},"maintainers":[{"name":"orling","email":"orlin.georgiev@gmail.com"}],"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/grapheme-splitter-1.0.1.tgz_1500583030522_0.4711209151428193"},"directories":{}},"1.0.2":{"name":"grapheme-splitter","version":"1.0.2","description":"A JavaScipt library that breaks strings into their individual user-perceived characters. It supports emojis!","homepage":"https://github.com/orling/grapheme-splitter","author":{"name":"Orlin Georgiev"},"contributors":[{"name":"Lucas Tadeu Teixeira","email":"lucas@fastmail.nl","url":"https://lucas.is"}],"main":"index.js","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"test":"tape tests/grapheme_splitter_tests.js"},"repository":{"type":"git","url":"git+https://github.com/orling/grapheme-splitter.git"},"bugs":{"url":"https://github.com/orling/grapheme-splitter/issues"},"dependencies":{},"devDependencies":{"tape":"^4.6.3"},"engines":{"npm":"~7.3.0"},"gitHead":"663679c01d7576470a3b5889a8ea166fb57f10a0","_id":"grapheme-splitter@1.0.2","_shasum":"639e9dc1bf065892c643de31daa27cf58b1068e2","_from":".","_npmVersion":"3.10.10","_nodeVersion":"6.11.0","_npmUser":{"name":"orling","email":"orlin.georgiev@gmail.com"},"dist":{"shasum":"639e9dc1bf065892c643de31daa27cf58b1068e2","tarball":"https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.2.tgz","integrity":"sha512-38Gi2nMSgddKyyh9fpsX4AhyOsy4BVxLBypTMLL8kktJg6Agqsbo4xMahExg4X4g7r0PEGMTgtvAydJx6jfp1A==","signatures":[{"keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA","sig":"MEQCIBFHLRIXP60nSPjl/xcXcau81qWyJCPwmohUuOliKULQAiBrpFSm+H/DLDMwQovRIibymGSdKSCk7eud7mKt/2OCRQ=="}]},"maintainers":[{"name":"orling","email":"orlin.georgiev@gmail.com"}],"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/grapheme-splitter-1.0.2.tgz_1500583853152_0.11804638826288283"},"directories":{}},"1.0.3":{"name":"grapheme-splitter","version":"1.0.3","description":"A JavaScipt library that breaks strings into their individual user-perceived characters. It supports emojis!","homepage":"https://github.com/orling/grapheme-splitter","author":{"name":"Orlin Georgiev"},"contributors":[{"name":"Lucas Tadeu Teixeira","email":"lucas@fastmail.nl","url":"https://lucas.is"}],"main":"index.js","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"test":"tape tests/grapheme_splitter_tests.js"},"repository":{"type":"git","url":"git+https://github.com/orling/grapheme-splitter.git"},"bugs":{"url":"https://github.com/orling/grapheme-splitter/issues"},"dependencies":{},"devDependencies":{"tape":"^4.6.3"},"gitHead":"642ea8d2e6f26156e90ff8e8bcb00af0676b97ec","_id":"grapheme-splitter@1.0.3","_npmVersion":"6.4.0","_nodeVersion":"6.11.0","_npmUser":{"name":"orling","email":"orlin.georgiev@gmail.com"},"dist":{"integrity":"sha512-RuZjCNyPGjjRGBB5UzYFeQqdmcpIJmlBnPm3tv2uFpebGJVnwc+zaiuYBdo3XYoMMYSQUGx2ixGDFzNxYUQzfg==","shasum":"6ffffdd44311862ada843f9cd3e7d05eda9f411c","tarball":"https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.3.tgz","fileCount":7,"unpackedSize":236656,"npm-signature":"-----BEGIN PGP SIGNATURE-----\r\nVersion: OpenPGP.js v3.0.4\r\nComment: https://openpgpjs.org\r\n\r\nwsFcBAEBCAAQBQJbfjQZCRA9TVsSAnZWagAAw3IP/i8rfJBw03aojFnDjW4w\nG1lgVxEoxHFOc6uXoT0LATwdZhQTb7+3AHujxP//vqAwXNGchAMfvfRhGsdK\nm7dfdA+Yu0vrLqdKt62SBGiJOy+m2Ocdyk76s58G5JKaJaXOD+sBFFWbC8dS\ndWN065HloL65eDJz9BQvhG4WgKSEHhI2v1/rEvj7hwMHBwPNHICmimLWcSRl\nRAhbXj4xL0R9VCnwZo557zJFF4wkVU14xdswvf5QwT5KUoOGigyy8o0deN20\nk4Ivn5jXH51UOgiVT2xBkq+EfI68IxUbnI7c2l6s61hjvR6xLIMeQZZyaCJI\nSrmShf4l53ntPlT72JbV/UcYcGiyg0X53g15XIY8ScLKvtCSsiB1VC+6rSpD\nwjf2JK2tiNhq6/wy39X8Jb5Mn56lw6jmQpfFCSZN+++1u2dmFvlMcztg0K0D\n5wM5L4dwoodArxv2akYSPJvW0mufL0Hl2zOG6zZBast/GXLvzlwMqUo6tRzG\nEBIAMjo7ggOyKNkpbXlp/CG4flVX6BVPck5dexn8Rq9hsUcViPAybX4ulg+W\n6Qkp916QcEp2Bc89NngQIp6JPQdXjsPUCp8mgmrKCKo2szZe3PWqk2USuwnw\nFq+Wc97r/XkshTh2xrvhQ10KvAsf/ucsY4W+erFI+RvNJGQuALm6CYmOvab2\nBdhT\r\n=bENx\r\n-----END PGP SIGNATURE-----\r\n","signatures":[{"keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA","sig":"MEQCIFfVTPmA4XUOuCwzbTzfh0SJhXu8IYeZMKjdv5a/pyr1AiBSgHhMCB2gFzDCbBg3KjZaJe2xFel1NqQpeXuIEQckJQ=="}]},"maintainers":[{"name":"orling","email":"orlin.georgiev@gmail.com"}],"directories":{},"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/grapheme-splitter_1.0.3_1534997528574_0.18999743495129384"},"_hasShrinkwrap":false},"1.0.4":{"name":"grapheme-splitter","version":"1.0.4","description":"A JavaScript library that breaks strings into their individual user-perceived characters. It supports emojis!","homepage":"https://github.com/orling/grapheme-splitter","author":{"name":"Orlin Georgiev"},"contributors":[{"name":"Lucas Tadeu Teixeira","email":"lucas@fastmail.nl","url":"https://lucas.is"}],"main":"index.js","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"test":"tape tests/grapheme_splitter_tests.js"},"repository":{"type":"git","url":"git+https://github.com/orling/grapheme-splitter.git"},"bugs":{"url":"https://github.com/orling/grapheme-splitter/issues"},"dependencies":{},"devDependencies":{"tape":"^4.6.3"},"gitHead":"0609d90dcbc93b42d8ceebb7aec0eda38b5d916d","_id":"grapheme-splitter@1.0.4","_npmVersion":"6.4.0","_nodeVersion":"6.11.0","_npmUser":{"name":"orling","email":"orlin.georgiev@gmail.com"},"dist":{"integrity":"sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==","shasum":"9cf3a665c6247479896834af35cf1dbb4400767e","tarball":"https://registry.npmjs.org/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz","fileCount":7,"unpackedSize":236662,"npm-signature":"-----BEGIN PGP SIGNATURE-----\r\nVersion: OpenPGP.js v3.0.4\r\nComment: https://openpgpjs.org\r\n\r\nwsFcBAEBCAAQBQJbmCTICRA9TVsSAnZWagAAL2EQAIhZzGmSDAcAd9glTwr9\nCMCivJmTjCV0I/zOIDIGGQJPw42MypzJ/ndRqXq/kobzG7Fv6MD078t3BhyH\nWPDG9Emj0BY4vbyV097piZXsC80RFqKeaGs+iRtktMMf6W9SsgcrP3fgX7Q4\nClVgWeZygALx9OyUj8sVQdBHJBccZavYyeDVMMmWxmcUpwvkGxiyOx8ckHHE\nFYxxBIuBUn08ofFJBNpQYQOOJgxp666ONlg9Xbe71CfeaM8P3epj5najQbaa\nmJ8eOuVimV/7PAQdn+auDPh7y1CpnN6yDwKR9leFHg/ANuhz2IpPUU3WH5p9\nDrjOLhsbZ2xqot5BE9qA5oHnJnTZG+n1a9thRfbiKzSkSIEDZZLAaLVjdPXX\nugm5E+dxkNDwpocjh9TGA4DnBopM3S+nXQHoFpq/u0585NPt/2YNuRS9YMxS\nIv6oPCEBXq3E534VGztOyHhYJh5eakldGlFHYmO1EjoM/zq9rQvkP6BGPLLw\n+WoKimMioD/ukmm6qztnaYayvHPUlsc0A0g6tiNDgpjDr23KkH2PCd3PgM6l\nwky0MBSa005CySy6dWqogseR4FTJxQTvVNhtd8G1O+PXaERE2OfEHftxOpgg\ngMF+J+hQkKi+2N6ccTIo4833rddWeZiJXFerTOJUpU2hmLurt8XCuNrWYcTi\nzWs0\r\n=UKW6\r\n-----END PGP SIGNATURE-----\r\n","signatures":[{"keyid":"SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA","sig":"MEYCIQC5sv+DdX2DwiDMDLZsvPCtosj/zx1c29SwCnafUH6LxAIhAIYRFpLudH2DFB862j+5N0AFJnoOoyo5zNRwP1bHpsbO"}]},"maintainers":[{"name":"orling","email":"orlin.georgiev@gmail.com"}],"directories":{},"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/grapheme-splitter_1.0.4_1536697543257_0.5835726576037328"},"_hasShrinkwrap":false}},"readme":"# Background\r\n\r\nIn JavaScript there is not always a one-to-one relationship between string characters and what a user would call a separate visual \"letter\". Some symbols are represented by several characters. This can cause issues when splitting strings and inadvertently cutting a multi-char letter in half, or when you need the actual number of letters in a string.\r\n\r\nFor example, emoji characters like \"🌷\",\"🎁\",\"💩\",\"😜\" and \"👍\" are represented by two JavaScript characters each (high surrogate and low surrogate). That is, \r\n\r\n```javascript\r\n\"🌷\".length == 2\r\n```\r\nThe combined emoji are even longer:\r\n```javascript\r\n\"🏳️‍🌈\".length == 6\r\n```\r\n\r\nWhat's more, some languages often include combining marks - characters that are used to modify the letters before them. Common examples are the German letter ü and the Spanish letter ñ. Sometimes they can be represented alternatively both as a single character and as a letter + combining mark, with both forms equally valid:\r\n    \r\n```javascript\r\nvar two = \"ñ\"; // unnormalized two-char n+◌̃  , i.e. \"\\u006E\\u0303\";\r\nvar one = \"ñ\"; // normalized single-char, i.e. \"\\u00F1\"\r\nconsole.log(one!=two); // prints 'true'\r\n```\r\n\r\nUnicode normalization, as performed by the popular punycode.js library or ECMAScript 6's String.normalize, can **sometimes** fix those differences and turn two-char sequences into single characters. But it is **not** enough in all cases. Some languages like Hindi make extensive use of combining marks on their letters, that have no dedicated single-codepoint Unicode sequences, due to the sheer number of possible combinations.\r\nFor example, the Hindi word \"अनुच्छेद\" is comprised of 5 letters and 3 combining marks:\r\n\r\nअ + न + ु + च + ् + छ + े + द\r\n\r\nwhich is in fact just 5 user-perceived letters:\r\n\r\nअ + नु + च् + छे + द\r\n\r\nand which Unicode normalization would not combine properly.\r\nThere are also the unusual letter+combining mark combinations which have no dedicated Unicode codepoint. The string Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘ obviously has 5 separate letters, but is in fact comprised of 58 JavaScript characters, most of which are combining marks.\r\n\r\nEnter the grapheme-splitter.js library. It can be used to properly split JavaScript strings into what a human user would call separate letters (or \"extended grapheme clusters\" in Unicode terminology), no matter what their internal representation is. It is an implementation on the [Default Grapheme Cluster Boundary](http://unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table) of [UAX #29](http://www.unicode.org/reports/tr29/). \r\n\r\n# Installation\r\n\r\nYou can use the index.js file directly as-is. Or you you can install `grapheme-splitter` to your project using the NPM command below:\r\n\r\n```\r\n$ npm install --save grapheme-splitter\r\n```\r\n\r\n# Tests\r\n\r\nTo run the tests on `grapheme-splitter`, use the command below:\r\n\r\n```\r\n$ npm test\r\n```\r\n\r\n# Usage\r\n\r\nJust initialize and use:\r\n\r\n```javascript\r\nvar splitter = new GraphemeSplitter();\r\n\r\n// split the string to an array of grapheme clusters (one string each)\r\nvar graphemes = splitter.splitGraphemes(string);\r\n\r\n// iterate the string to an iterable iterator of grapheme clusters (one string each)\r\nvar graphemes = splitter.iterateGraphemes(string);\r\n\r\n// or do this if you just need their number\r\nvar graphemeCount = splitter.countGraphemes(string);\r\n```\r\n\r\n# Examples\r\n\r\n```javascript\r\nvar splitter = new GraphemeSplitter();\r\n\r\n// plain latin alphabet - nothing spectacular\r\nsplitter.splitGraphemes(\"abcd\"); // returns [\"a\", \"b\", \"c\", \"d\"]\r\n\r\n// two-char emojis and six-char combined emoji\r\nsplitter.splitGraphemes(\"🌷🎁💩😜👍🏳️‍🌈\"); // returns [\"🌷\",\"🎁\",\"💩\",\"😜\",\"👍\",\"🏳️‍🌈\"]\r\n\r\n// diacritics as combining marks, 10 JavaScript chars\r\nsplitter.splitGraphemes(\"Ĺo͂řȩm̅\"); // returns [\"Ĺ\",\"o͂\",\"ř\",\"ȩ\",\"m̅\"]\r\n\r\n// individual Korean characters (Jamo), 4 JavaScript chars\r\nsplitter.splitGraphemes(\"뎌쉐\"); // returns [\"뎌\",\"쉐\"]\r\n\r\n// Hindi text with combining marks, 8 JavaScript chars\r\nsplitter.splitGraphemes(\"अनुच्छेद\"); // returns [\"अ\",\"नु\",\"च्\",\"छे\",\"द\"]\r\n\r\n// demonic multiple combining marks, 75 JavaScript chars\r\nsplitter.splitGraphemes(\"Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞\"); // returns [\"Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍\",\"A̴̵̜̰͔ͫ͗͢\",\"L̠ͨͧͩ͘\",\"G̴̻͈͍͔̹̑͗̎̅͛́\",\"Ǫ̵̹̻̝̳͂̌̌͘\",\"!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞\"]\r\n```\r\n\r\n# TypeScript\r\n\r\nGrapheme splitter includes TypeScript declarations.\r\n\r\n```typescript\r\nimport GraphemeSplitter = require('grapheme-splitter')\r\n\r\nconst splitter = new GraphemeSplitter()\r\n\r\nconst split: string[] = splitter.splitGraphemes('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞')\r\n```\r\n\r\n# Acknowledgements\r\n\r\nThis library is heavily influenced by Devon Govett's excellent grapheme-breaker CoffeeScript library at https://github.com/devongovett/grapheme-breaker with an emphasis on ease of integration and pure JavaScript implementation.\r\n\r\n\r\n\r\n","maintainers":[{"name":"orling","email":"orlin.georgiev@gmail.com"}],"time":{"modified":"2022-06-18T13:52:11.348Z","created":"2017-07-20T20:37:11.585Z","1.0.1":"2017-07-20T20:37:11.585Z","1.0.2":"2017-07-20T20:50:54.145Z","1.0.3":"2018-08-23T04:12:08.802Z","1.0.4":"2018-09-11T20:25:43.410Z"},"homepage":"https://github.com/orling/grapheme-splitter","keywords":["utf-8","strings","emoji","split"],"repository":{"type":"git","url":"git+https://github.com/orling/grapheme-splitter.git"},"contributors":[{"name":"Lucas Tadeu Teixeira","email":"lucas@fastmail.nl","url":"https://lucas.is"}],"author":{"name":"Orlin Georgiev"},"bugs":{"url":"https://github.com/orling/grapheme-splitter/issues"},"license":"MIT","readmeFilename":"README.md"}