// SPDX-License-Identifier: LGPL-3.0-or-later /** * \file utf/utf.h * * UTF-8 decoding and encoding functionality. * * \author Russ Cox * \author Rob Pike * \author Ken Thompson * \author Lorenzo Cogotti * \copyright The DoubleFourteen Code Forge (C) All Rights Reserved. * * This API is derived by work authored by Russ Cox - namely the Unix port of the Plan 9 * UTF-8 library, originally written by Rob Pike and Ken Thompson. * * Original license terms follow: * ``` * Copyright © 2021 Plan 9 Foundation * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * ``` * The original libutf library is available at: https://9fans.github.io/plan9port/unix/libutf.tgz */ #ifndef DF_UTF_H_ #define DF_UTF_H_ #include "utf/utfdef.h" /** * \brief Convert the first UTF-8 rune inside `\0` terminated string `str` to a `Rune` in `dest`. * * \return Number of bytes read from `str` for the returned `Rune`. * * \note Returned bytes are usually equivalent to `runelen()` over the returned `Rune`, * but values may differ in case of a decoding error. In that case `¢hartorune()` returns `RUNE_ERR`, * and returns 1. This allows the caller to skip one byte and move on with the decoding. */ size_t chartorune(Rune *dest, const char *str); /// Inverse of `chartorune()`. size_t runetochar(char *dest, Rune r); /// Calculate the number of bytes necessary to encode `r`. size_t runelen(Rune r); /// Calculate the number of bytes necessary to encode the first `n` runes referenced by `r`. size_t runenlen(const Rune *r, size_t n); /// Test whether the first `n` bytes referenced by `src` form at least one `Rune`. Boolean fullrune(const char *src, size_t n); /// Convert `r` to lowercase. Rune tolowerrune(Rune r); /// Convert 'r` to uppercase. Rune toupperrune(Rune r); /// Convert `r` to titlecase. Rune totitlerune(Rune r); /// Test whether `r` is a lowercase UTF-8 rune. Boolean islowerrune(Rune r); /// Test whether `r` is an uppercase UTF-8 rune. Boolean isupperrune(Rune r); /// Test whether `r` represents an alphabetic UTF-8 rune. Boolean isalpharune(Rune r); /// Test wheter `r` is a title-case UTF-8 rune. Boolean istitlerune(Rune r); /// Test whether `r` represents a space UTF-8 rune. Boolean isspacerune(Rune r); /// Return the number of runes inside the `\0` terminated UTF-8 string `s`. size_t utflen(const char *s); /// Find the first occurrence of `r` inside the `\0' terminated UTF-8 string `s`, `NULL` if not found. char *utfrune(const char *s, Rune r); /// Find the last occurrence of `r` inside the `\0` terminated UTF-8 string `s`, `NULL` if not found. char *utfrrune(const char *s, Rune r); /// Find the first occurrence of the UTF-8 `\0` terminated UTF-8 string `needle` inside `haystack`, `NULL` if not found. char *utfutf(const char *haystack, const char *needle); #endif