third_party/llvm-16.0/llvm/include/llvm/Support/Unicode.h - SwiftShader - Git at Google

 //===- llvm/Support/Unicode.h - Unicode character properties  -*- C++ -*-=====//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines functions that allow querying certain properties of Unicode
 // characters.
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_SUPPORT_UNICODE_H
 #define LLVM_SUPPORT_UNICODE_H

 #include "llvm/ADT/SmallString.h"
 #include <optional>
 #include <string>

 namespace llvm {
 class StringRef;

 namespace sys {
 namespace unicode {

 enum ColumnWidthErrors {
   ErrorInvalidUTF8 = -2,
   ErrorNonPrintableCharacter = -1
 };

 /// Determines if a character is likely to be displayed correctly on the
 /// terminal. Exact implementation would have to depend on the specific
 /// terminal, so we define the semantic that should be suitable for generic case
 /// of a terminal capable to output Unicode characters.
 ///
 /// Printable codepoints are those in the categories L, M, N, P, S and Zs
 /// \return true if the character is considered printable.
 bool isPrintable(int UCS);

 // Formatting codepoints are codepoints in the Cf category.
 bool isFormatting(int UCS);

 /// Gets the number of positions the UTF8-encoded \p Text is likely to occupy
 /// when output on a terminal ("character width"). This depends on the
 /// implementation of the terminal, and there's no standard definition of
 /// character width.
 ///
 /// The implementation defines it in a way that is expected to be compatible
 /// with a generic Unicode-capable terminal.
 ///
 /// \return Character width:
 ///   * ErrorNonPrintableCharacter (-1) if \p Text contains non-printable
 ///     characters (as identified by isPrintable);
 ///   * 0 for each non-spacing and enclosing combining mark;
 ///   * 2 for each CJK character excluding halfwidth forms;
 ///   * 1 for each of the remaining characters.
 int columnWidthUTF8(StringRef Text);

 /// Fold input unicode character according the Simple unicode case folding
 /// rules.
 int foldCharSimple(int C);

 /// Maps the name or the alias of a Unicode character to its associated
 /// codepoints.
 /// The names and aliases are derived from UnicodeData.txt and NameAliases.txt
 /// For compatibility with the semantics of named character escape sequences in
 /// C++, this mapping does an exact match sensitive to casing and spacing.
 /// \return The codepoint of the corresponding character, if any.
 std::optional<char32_t> nameToCodepointStrict(StringRef Name);

 struct LooseMatchingResult {
   char32_t CodePoint;
   SmallString<64> Name;
 };

 std::optional<LooseMatchingResult> nameToCodepointLooseMatching(StringRef Name);

 struct MatchForCodepointName {
   std::string Name;
   uint32_t Distance = 0;
   char32_t Value = 0;
 };

 SmallVector<MatchForCodepointName>
 nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount);

 } // namespace unicode
 } // namespace sys
 } // namespace llvm

 #endif
	//===- llvm/Support/Unicode.h - Unicode character properties -- C++ --=====//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines functions that allow querying certain properties of Unicode
	// characters.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_SUPPORT_UNICODE_H
	#define LLVM_SUPPORT_UNICODE_H

	#include "llvm/ADT/SmallString.h"
	#include <optional>
	#include <string>

	namespace llvm {
	class StringRef;

	namespace sys {
	namespace unicode {

	enum ColumnWidthErrors {
	ErrorInvalidUTF8 = -2,
	ErrorNonPrintableCharacter = -1
	};

	/// Determines if a character is likely to be displayed correctly on the
	/// terminal. Exact implementation would have to depend on the specific
	/// terminal, so we define the semantic that should be suitable for generic case
	/// of a terminal capable to output Unicode characters.
	///
	/// Printable codepoints are those in the categories L, M, N, P, S and Zs
	/// \return true if the character is considered printable.
	bool isPrintable(int UCS);

	// Formatting codepoints are codepoints in the Cf category.
	bool isFormatting(int UCS);

	/// Gets the number of positions the UTF8-encoded \p Text is likely to occupy
	/// when output on a terminal ("character width"). This depends on the
	/// implementation of the terminal, and there's no standard definition of
	/// character width.
	///
	/// The implementation defines it in a way that is expected to be compatible
	/// with a generic Unicode-capable terminal.
	///
	/// \return Character width:
	/// * ErrorNonPrintableCharacter (-1) if \p Text contains non-printable
	/// characters (as identified by isPrintable);
	/// * 0 for each non-spacing and enclosing combining mark;
	/// * 2 for each CJK character excluding halfwidth forms;
	/// * 1 for each of the remaining characters.
	int columnWidthUTF8(StringRef Text);

	/// Fold input unicode character according the Simple unicode case folding
	/// rules.
	int foldCharSimple(int C);

	/// Maps the name or the alias of a Unicode character to its associated
	/// codepoints.
	/// The names and aliases are derived from UnicodeData.txt and NameAliases.txt
	/// For compatibility with the semantics of named character escape sequences in
	/// C++, this mapping does an exact match sensitive to casing and spacing.
	/// \return The codepoint of the corresponding character, if any.
	std::optional<char32_t> nameToCodepointStrict(StringRef Name);

	struct LooseMatchingResult {
	char32_t CodePoint;
	SmallString<64> Name;
	};

	std::optional<LooseMatchingResult> nameToCodepointLooseMatching(StringRef Name);

	struct MatchForCodepointName {
	std::string Name;
	uint32_t Distance = 0;
	char32_t Value = 0;
	};

	SmallVector<MatchForCodepointName>
	nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount);

	} // namespace unicode
	} // namespace sys
	} // namespace llvm

	#endif