fallback.js 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. 'use strict';
  2. /**
  3. * Checks if a given buffer contains only correct UTF-8.
  4. * Ported from https://www.cl.cam.ac.uk/%7Emgk25/ucs/utf8_check.c by
  5. * Markus Kuhn.
  6. *
  7. * @param {Buffer} buf The buffer to check
  8. * @return {Boolean} `true` if `buf` contains only correct UTF-8, else `false`
  9. * @public
  10. */
  11. function isValidUTF8(buf) {
  12. const len = buf.length;
  13. let i = 0;
  14. while (i < len) {
  15. if ((buf[i] & 0x80) === 0x00) { // 0xxxxxxx
  16. i++;
  17. } else if ((buf[i] & 0xe0) === 0xc0) { // 110xxxxx 10xxxxxx
  18. if (
  19. i + 1 === len ||
  20. (buf[i + 1] & 0xc0) !== 0x80 ||
  21. (buf[i] & 0xfe) === 0xc0 // overlong
  22. ) {
  23. return false;
  24. }
  25. i += 2;
  26. } else if ((buf[i] & 0xf0) === 0xe0) { // 1110xxxx 10xxxxxx 10xxxxxx
  27. if (
  28. i + 2 >= len ||
  29. (buf[i + 1] & 0xc0) !== 0x80 ||
  30. (buf[i + 2] & 0xc0) !== 0x80 ||
  31. buf[i] === 0xe0 && (buf[i + 1] & 0xe0) === 0x80 || // overlong
  32. buf[i] === 0xed && (buf[i + 1] & 0xe0) === 0xa0 // surrogate (U+D800 - U+DFFF)
  33. ) {
  34. return false;
  35. }
  36. i += 3;
  37. } else if ((buf[i] & 0xf8) === 0xf0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  38. if (
  39. i + 3 >= len ||
  40. (buf[i + 1] & 0xc0) !== 0x80 ||
  41. (buf[i + 2] & 0xc0) !== 0x80 ||
  42. (buf[i + 3] & 0xc0) !== 0x80 ||
  43. buf[i] === 0xf0 && (buf[i + 1] & 0xf0) === 0x80 || // overlong
  44. buf[i] === 0xf4 && buf[i + 1] > 0x8f || buf[i] > 0xf4 // > U+10FFFF
  45. ) {
  46. return false;
  47. }
  48. i += 4;
  49. } else {
  50. return false;
  51. }
  52. }
  53. return true;
  54. }
  55. module.exports = isValidUTF8;