You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
3.8 KiB
C

// SPDX-License-Identifier: LGPL-3.0-or-later
/**
* \file utf/utf.h
*
* UTF-8 decoding and encoding functionality.
*
* \author Russ Cox
* \author Rob Pike
* \author Ken Thompson
* \author Lorenzo Cogotti
* \copyright The DoubleFourteen Code Forge (C) All Rights Reserved.
*
* This API is derived by work authored by Russ Cox - namely the Unix port of the Plan 9
* UTF-8 library, originally written by Rob Pike and Ken Thompson.
*
* Original license terms follow:
* ```
* Copyright © 2021 Plan 9 Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* ```
* The original libutf library is available at: https://9fans.github.io/plan9port/unix/libutf.tgz
*/
#ifndef DF_UTF_H_
#define DF_UTF_H_
#include "utf/utfdef.h"
/**
* \brief Convert the first UTF-8 rune inside `\0` terminated string `str` to a `Rune` in `dest`.
*
* \return Number of bytes read from `str` for the returned `Rune`.
*
* \note Returned bytes are usually equivalent to `runelen()` over the returned `Rune`,
* but values may differ in case of a decoding error. In that case `¢hartorune()` returns `RUNE_ERR`,
* and returns 1. This allows the caller to skip one byte and move on with the decoding.
*/
size_t chartorune(Rune *dest, const char *str);
/// Inverse of `chartorune()`.
size_t runetochar(char *dest, Rune r);
/// Calculate the number of bytes necessary to encode `r`.
size_t runelen(Rune r);
/// Calculate the number of bytes necessary to encode the first `n` runes referenced by `r`.
size_t runenlen(const Rune *r, size_t n);
/// Test whether the first `n` bytes referenced by `src` form at least one `Rune`.
Boolean fullrune(const char *src, size_t n);
/// Convert `r` to lowercase.
Rune tolowerrune(Rune r);
/// Convert 'r` to uppercase.
Rune toupperrune(Rune r);
/// Convert `r` to titlecase.
Rune totitlerune(Rune r);
/// Test whether `r` is a lowercase UTF-8 rune.
Boolean islowerrune(Rune r);
/// Test whether `r` is an uppercase UTF-8 rune.
Boolean isupperrune(Rune r);
/// Test whether `r` represents an alphabetic UTF-8 rune.
Boolean isalpharune(Rune r);
/// Test wheter `r` is a title-case UTF-8 rune.
Boolean istitlerune(Rune r);
/// Test whether `r` represents a space UTF-8 rune.
Boolean isspacerune(Rune r);
/// Return the number of runes inside the `\0` terminated UTF-8 string `s`.
size_t utflen(const char *s);
/// Find the first occurrence of `r` inside the `\0' terminated UTF-8 string `s`, `NULL` if not found.
char *utfrune(const char *s, Rune r);
/// Find the last occurrence of `r` inside the `\0` terminated UTF-8 string `s`, `NULL` if not found.
char *utfrrune(const char *s, Rune r);
/// Find the first occurrence of the UTF-8 `\0` terminated UTF-8 string `needle` inside `haystack`, `NULL` if not found.
char *utfutf(const char *haystack, const char *needle);
#endif