|
|
|
|
// SPDX-License-Identifier: LGPL-3.0-or-later
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* \file utf/runetype.c
|
|
|
|
|
*
|
|
|
|
|
* Implements `Rune` classification functions.
|
|
|
|
|
*
|
|
|
|
|
* \copyright The DoubleFourteen Code Forge (C) All Rights Reserved
|
|
|
|
|
* \author Lorenzo Cogotti
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This work is derived from Plan 9 libutf, see utf/utf.h for details.
|
|
|
|
|
* Original license terms follow:
|
|
|
|
|
*
|
|
|
|
|
* The authors of this software are Rob Pike and Ken Thompson.
|
|
|
|
|
* Copyright (c) 2002 by Lucent Technologies.
|
|
|
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
|
|
|
* purpose without fee is hereby granted, provided that this entire notice
|
|
|
|
|
* is included in all copies of any software which is or includes a copy
|
|
|
|
|
* or modification of this software and in all copies of the supporting
|
|
|
|
|
* documentation for such software.
|
|
|
|
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
|
|
|
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
|
|
|
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
|
|
|
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
|
|
|
|
*
|
|
|
|
|
* The original libutf library is available at: https://9fans.github.io/plan9port/unix/libutf.tgz
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "utf/utf.h"
|
|
|
|
|
|
|
|
|
|
// Alpha ranges -
|
|
|
|
|
// only covers ranges not in lower||upper
|
|
|
|
|
static const Rune u8_alpha2[] = {
|
|
|
|
|
0x00d8, 0x00f6, /* Ø - ö */
|
|
|
|
|
0x00f8, 0x01f5, /* ø - ǵ */
|
|
|
|
|
0x0250, 0x02a8, /* ɐ - ʨ */
|
|
|
|
|
0x038e, 0x03a1, /* Ύ - Ρ */
|
|
|
|
|
0x03a3, 0x03ce, /* Σ - ώ */
|
|
|
|
|
0x03d0, 0x03d6, /* ϐ - ϖ */
|
|
|
|
|
0x03e2, 0x03f3, /* Ϣ - ϳ */
|
|
|
|
|
0x0490, 0x04c4, /* Ґ - ӄ */
|
|
|
|
|
0x0561, 0x0587, /* ա - և */
|
|
|
|
|
0x05d0, 0x05ea, /* א - ת */
|
|
|
|
|
0x05f0, 0x05f2, /* װ - ײ */
|
|
|
|
|
0x0621, 0x063a, /* ء - غ */
|
|
|
|
|
0x0640, 0x064a, /* ـ - ي */
|
|
|
|
|
0x0671, 0x06b7, /* ٱ - ڷ */
|
|
|
|
|
0x06ba, 0x06be, /* ں - ھ */
|
|
|
|
|
0x06c0, 0x06ce, /* ۀ - ێ */
|
|
|
|
|
0x06d0, 0x06d3, /* ې - ۓ */
|
|
|
|
|
0x0905, 0x0939, /* अ - ह */
|
|
|
|
|
0x0958, 0x0961, /* क़ - ॡ */
|
|
|
|
|
0x0985, 0x098c, /* অ - ঌ */
|
|
|
|
|
0x098f, 0x0990, /* এ - ঐ */
|
|
|
|
|
0x0993, 0x09a8, /* ও - ন */
|
|
|
|
|
0x09aa, 0x09b0, /* প - র */
|
|
|
|
|
0x09b6, 0x09b9, /* শ - হ */
|
|
|
|
|
0x09dc, 0x09dd, /* ড় - ঢ় */
|
|
|
|
|
0x09df, 0x09e1, /* য় - ৡ */
|
|
|
|
|
0x09f0, 0x09f1, /* ৰ - ৱ */
|
|
|
|
|
0x0a05, 0x0a0a, /* ਅ - ਊ */
|
|
|
|
|
0x0a0f, 0x0a10, /* ਏ - ਐ */
|
|
|
|
|
0x0a13, 0x0a28, /* ਓ - ਨ */
|
|
|
|
|
0x0a2a, 0x0a30, /* ਪ - ਰ */
|
|
|
|
|
0x0a32, 0x0a33, /* ਲ - ਲ਼ */
|
|
|
|
|
0x0a35, 0x0a36, /* ਵ - ਸ਼ */
|
|
|
|
|
0x0a38, 0x0a39, /* ਸ - ਹ */
|
|
|
|
|
0x0a59, 0x0a5c, /* ਖ਼ - ੜ */
|
|
|
|
|
0x0a85, 0x0a8b, /* અ - ઋ */
|
|
|
|
|
0x0a8f, 0x0a91, /* એ - ઑ */
|
|
|
|
|
0x0a93, 0x0aa8, /* ઓ - ન */
|
|
|
|
|
0x0aaa, 0x0ab0, /* પ - ર */
|
|
|
|
|
0x0ab2, 0x0ab3, /* લ - ળ */
|
|
|
|
|
0x0ab5, 0x0ab9, /* વ - હ */
|
|
|
|
|
0x0b05, 0x0b0c, /* ଅ - ଌ */
|
|
|
|
|
0x0b0f, 0x0b10, /* ଏ - ଐ */
|
|
|
|
|
0x0b13, 0x0b28, /* ଓ - ନ */
|
|
|
|
|
0x0b2a, 0x0b30, /* ପ - ର */
|
|
|
|
|
0x0b32, 0x0b33, /* ଲ - ଳ */
|
|
|
|
|
0x0b36, 0x0b39, /* ଶ - ହ */
|
|
|
|
|
0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */
|
|
|
|
|
0x0b5f, 0x0b61, /* ୟ - ୡ */
|
|
|
|
|
0x0b85, 0x0b8a, /* அ - ஊ */
|
|
|
|
|
0x0b8e, 0x0b90, /* எ - ஐ */
|
|
|
|
|
0x0b92, 0x0b95, /* ஒ - க */
|
|
|
|
|
0x0b99, 0x0b9a, /* ங - ச */
|
|
|
|
|
0x0b9e, 0x0b9f, /* ஞ - ட */
|
|
|
|
|
0x0ba3, 0x0ba4, /* ண - த */
|
|
|
|
|
0x0ba8, 0x0baa, /* ந - ப */
|
|
|
|
|
0x0bae, 0x0bb5, /* ம - வ */
|
|
|
|
|
0x0bb7, 0x0bb9, /* ஷ - ஹ */
|
|
|
|
|
0x0c05, 0x0c0c, /* అ - ఌ */
|
|
|
|
|
0x0c0e, 0x0c10, /* ఎ - ఐ */
|
|
|
|
|
0x0c12, 0x0c28, /* ఒ - న */
|
|
|
|
|
0x0c2a, 0x0c33, /* ప - ళ */
|
|
|
|
|
0x0c35, 0x0c39, /* వ - హ */
|
|
|
|
|
0x0c60, 0x0c61, /* ౠ - ౡ */
|
|
|
|
|
0x0c85, 0x0c8c, /* ಅ - ಌ */
|
|
|
|
|
0x0c8e, 0x0c90, /* ಎ - ಐ */
|
|
|
|
|
0x0c92, 0x0ca8, /* ಒ - ನ */
|
|
|
|
|
0x0caa, 0x0cb3, /* ಪ - ಳ */
|
|
|
|
|
0x0cb5, 0x0cb9, /* ವ - ಹ */
|
|
|
|
|
0x0ce0, 0x0ce1, /* ೠ - ೡ */
|
|
|
|
|
0x0d05, 0x0d0c, /* അ - ഌ */
|
|
|
|
|
0x0d0e, 0x0d10, /* എ - ഐ */
|
|
|
|
|
0x0d12, 0x0d28, /* ഒ - ന */
|
|
|
|
|
0x0d2a, 0x0d39, /* പ - ഹ */
|
|
|
|
|
0x0d60, 0x0d61, /* ൠ - ൡ */
|
|
|
|
|
0x0e01, 0x0e30, /* ก - ะ */
|
|
|
|
|
0x0e32, 0x0e33, /* า - ำ */
|
|
|
|
|
0x0e40, 0x0e46, /* เ - ๆ */
|
|
|
|
|
0x0e5a, 0x0e5b, /* ๚ - ๛ */
|
|
|
|
|
0x0e81, 0x0e82, /* ກ - ຂ */
|
|
|
|
|
0x0e87, 0x0e88, /* ງ - ຈ */
|
|
|
|
|
0x0e94, 0x0e97, /* ດ - ທ */
|
|
|
|
|
0x0e99, 0x0e9f, /* ນ - ຟ */
|
|
|
|
|
0x0ea1, 0x0ea3, /* ມ - ຣ */
|
|
|
|
|
0x0eaa, 0x0eab, /* ສ - ຫ */
|
|
|
|
|
0x0ead, 0x0eae, /* ອ - ຮ */
|
|
|
|
|
0x0eb2, 0x0eb3, /* າ - ຳ */
|
|
|
|
|
0x0ec0, 0x0ec4, /* ເ - ໄ */
|
|
|
|
|
0x0edc, 0x0edd, /* ໜ - ໝ */
|
|
|
|
|
0x0f18, 0x0f19, /* ༘ - ༙ */
|
|
|
|
|
0x0f40, 0x0f47, /* ཀ - ཇ */
|
|
|
|
|
0x0f49, 0x0f69, /* ཉ - ཀྵ */
|
|
|
|
|
0x10d0, 0x10f6, /* ა - ჶ */
|
|
|
|
|
0x1100, 0x1159, /* ᄀ - ᅙ */
|
|
|
|
|
0x115f, 0x11a2, /* ᅟ - ᆢ */
|
|
|
|
|
0x11a8, 0x11f9, /* ᆨ - ᇹ */
|
|
|
|
|
0x1e00, 0x1e9b, /* Ḁ - ẛ */
|
|
|
|
|
0x1f50, 0x1f57, /* ὐ - ὗ */
|
|
|
|
|
0x1f80, 0x1fb4, /* ᾀ - ᾴ */
|
|
|
|
|
0x1fb6, 0x1fbc, /* ᾶ - ᾼ */
|
|
|
|
|
0x1fc2, 0x1fc4, /* ῂ - ῄ */
|
|
|
|
|
0x1fc6, 0x1fcc, /* ῆ - ῌ */
|
|
|
|
|
0x1fd0, 0x1fd3, /* ῐ - ΐ */
|
|
|
|
|
0x1fd6, 0x1fdb, /* ῖ - Ί */
|
|
|
|
|
0x1fe0, 0x1fec, /* ῠ - Ῥ */
|
|
|
|
|
0x1ff2, 0x1ff4, /* ῲ - ῴ */
|
|
|
|
|
0x1ff6, 0x1ffc, /* ῶ - ῼ */
|
|
|
|
|
0x210a, 0x2113, /* ℊ - ℓ */
|
|
|
|
|
0x2115, 0x211d, /* ℕ - ℝ */
|
|
|
|
|
0x2120, 0x2122, /* ℠ - ™ */
|
|
|
|
|
0x212a, 0x2131, /* K - ℱ */
|
|
|
|
|
0x2133, 0x2138, /* ℳ - ℸ */
|
|
|
|
|
0x3041, 0x3094, /* ぁ - ゔ */
|
|
|
|
|
0x30a1, 0x30fa, /* ァ - ヺ */
|
|
|
|
|
0x3105, 0x312c, /* ㄅ - ㄬ */
|
|
|
|
|
0x3131, 0x318e, /* ㄱ - ㆎ */
|
|
|
|
|
0x3192, 0x319f, /* ㆒ - ㆟ */
|
|
|
|
|
0x3260, 0x327b, /* ㉠ - ㉻ */
|
|
|
|
|
0x328a, 0x32b0, /* ㊊ - ㊰ */
|
|
|
|
|
0x32d0, 0x32fe, /* ㋐ - ㋾ */
|
|
|
|
|
0x3300, 0x3357, /* ㌀ - ㍗ */
|
|
|
|
|
0x3371, 0x3376, /* ㍱ - ㍶ */
|
|
|
|
|
0x337b, 0x3394, /* ㍻ - ㎔ */
|
|
|
|
|
0x3399, 0x339e, /* ㎙ - ㎞ */
|
|
|
|
|
0x33a9, 0x33ad, /* ㎩ - ㎭ */
|
|
|
|
|
0x33b0, 0x33c1, /* ㎰ - ㏁ */
|
|
|
|
|
0x33c3, 0x33c5, /* ㏃ - ㏅ */
|
|
|
|
|
0x33c7, 0x33d7, /* ㏇ - ㏗ */
|
|
|
|
|
0x33d9, 0x33dd, /* ㏙ - ㏝ */
|
|
|
|
|
0x4e00, 0x9fff, /* 一 - 鿿 */
|
|
|
|
|
0xac00, 0xd7a3, /* 가 - 힣 */
|
|
|
|
|
0xf900, 0xfb06, /* 豈 - st */
|
|
|
|
|
0xfb13, 0xfb17, /* ﬓ - ﬗ */
|
|
|
|
|
0xfb1f, 0xfb28, /* ײַ - ﬨ */
|
|
|
|
|
0xfb2a, 0xfb36, /* שׁ - זּ */
|
|
|
|
|
0xfb38, 0xfb3c, /* טּ - לּ */
|
|
|
|
|
0xfb40, 0xfb41, /* נּ - סּ */
|
|
|
|
|
0xfb43, 0xfb44, /* ףּ - פּ */
|
|
|
|
|
0xfb46, 0xfbb1, /* צּ - ﮱ */
|
|
|
|
|
0xfbd3, 0xfd3d, /* ﯓ - ﴽ */
|
|
|
|
|
0xfd50, 0xfd8f, /* ﵐ - ﶏ */
|
|
|
|
|
0xfd92, 0xfdc7, /* ﶒ - ﷇ */
|
|
|
|
|
0xfdf0, 0xfdf9, /* ﷰ - ﷹ */
|
|
|
|
|
0xfe70, 0xfe72, /* ﹰ - ﹲ */
|
|
|
|
|
0xfe76, 0xfefc, /* ﹶ - ﻼ */
|
|
|
|
|
0xff66, 0xff6f, /* ヲ - ッ */
|
|
|
|
|
0xff71, 0xff9d, /* ア - ン */
|
|
|
|
|
0xffa0, 0xffbe, /* ᅠ - ᄒ */
|
|
|
|
|
0xffc2, 0xffc7, /* ᅡ - ᅦ */
|
|
|
|
|
0xffca, 0xffcf, /* ᅧ - ᅬ */
|
|
|
|
|
0xffd2, 0xffd7, /* ᅭ - ᅲ */
|
|
|
|
|
0xffda, 0xffdc /* ᅳ - ᅵ */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Alpha singlets -
|
|
|
|
|
// only covers ranges not in lower||upper
|
|
|
|
|
static const Rune u8_alpha1[] = {
|
|
|
|
|
0x00aa, /* ª */
|
|
|
|
|
0x00b5, /* µ */
|
|
|
|
|
0x00ba, /* º */
|
|
|
|
|
0x03da, /* Ϛ */
|
|
|
|
|
0x03dc, /* Ϝ */
|
|
|
|
|
0x03de, /* Ϟ */
|
|
|
|
|
0x03e0, /* Ϡ */
|
|
|
|
|
0x06d5, /* ە */
|
|
|
|
|
0x09b2, /* ল */
|
|
|
|
|
0x0a5e, /* ਫ਼ */
|
|
|
|
|
0x0a8d, /* ઍ */
|
|
|
|
|
0x0ae0, /* ૠ */
|
|
|
|
|
0x0b9c, /* ஜ */
|
|
|
|
|
0x0cde, /* ೞ */
|
|
|
|
|
0x0e4f, /* ๏ */
|
|
|
|
|
0x0e84, /* ຄ */
|
|
|
|
|
0x0e8a, /* ຊ */
|
|
|
|
|
0x0e8d, /* ຍ */
|
|
|
|
|
0x0ea5, /* ລ */
|
|
|
|
|
0x0ea7, /* ວ */
|
|
|
|
|
0x0eb0, /* ະ */
|
|
|
|
|
0x0ebd, /* ຽ */
|
|
|
|
|
0x1fbe, /* ι */
|
|
|
|
|
0x207f, /* ⁿ */
|
|
|
|
|
0x20a8, /* ₨ */
|
|
|
|
|
0x2102, /* ℂ */
|
|
|
|
|
0x2107, /* ℇ */
|
|
|
|
|
0x2124, /* ℤ */
|
|
|
|
|
0x2126, /* Ω */
|
|
|
|
|
0x2128, /* ℨ */
|
|
|
|
|
0xfb3e, /* מּ */
|
|
|
|
|
0xfe74 /* ﹴ */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Space ranges
|
|
|
|
|
static const Rune u8_space2[] = {
|
|
|
|
|
0x0009, 0x000a, /* tab and newline */
|
|
|
|
|
0x0020, 0x0020, /* space */
|
|
|
|
|
0x00a0, 0x00a0, /* */
|
|
|
|
|
0x2000, 0x200b, /* - */
|
|
|
|
|
0x2028, 0x2029, /*
|
|
|
|
|
- |