in a JavaScript, i am using Regex to split(/\W+/) to words.
when i split this, it's returning wrong value
var s3 = "bardzo dziękuję";
s3 = s3.split(/\W+/);
[0]: "bardzo"
[1]: "dzi"
[2]: "kuj"
How to fix this problem? please advice
You could use CharFunk https://raw.github.com/joelarson4/CharFunk , which handles Unicode fully.
var s3 = "bardzo dziękuję";
function notLetterOrDigit(ch) {
return !CharFunk.isLetterOrDigit(ch);
}
CharFunk.splitOnMatches(s3, notLetterOrDigit);