utf8rewind  1.5.2
System library for processing UTF-8 encoded text
Category flags

Flags to be used with utf8iscategory, to check whether code points in a string are part of that category. More...

Macros

#define UTF8_CATEGORY_LETTER_UPPERCASE   0x00000001
 Uppercase letter code points, Lu in the Unicode database. More...
 
#define UTF8_CATEGORY_LETTER_LOWERCASE   0x00000002
 Lowercase letter code points, Ll in the Unicode database. More...
 
#define UTF8_CATEGORY_LETTER_TITLECASE   0x00000004
 Titlecase letter code points, Lt in the Unicode database. More...
 
#define UTF8_CATEGORY_LETTER_MODIFIER   0x00000008
 Modifier letter code points, Lm in the Unicode database. More...
 
#define UTF8_CATEGORY_LETTER_OTHER   0x00000010
 Other letter code points, Lo in the Unicode database. More...
 
#define UTF8_CATEGORY_LETTER
 Combined flag for all letter categories. More...
 
#define UTF8_CATEGORY_CASE_MAPPED
 Combined flag for all letter categories with case mapping. More...
 
#define UTF8_CATEGORY_MARK_NON_SPACING   0x00000020
 Non-spacing mark code points, Mn in the Unicode database. More...
 
#define UTF8_CATEGORY_MARK_SPACING   0x00000040
 Spacing mark code points, Mc in the Unicode database. More...
 
#define UTF8_CATEGORY_MARK_ENCLOSING   0x00000080
 Enclosing mark code points, Me in the Unicode database. More...
 
#define UTF8_CATEGORY_MARK
 Combined flag for all mark categories. More...
 
#define UTF8_CATEGORY_NUMBER_DECIMAL   0x00000100
 Decimal number code points, Nd in the Unicode database. More...
 
#define UTF8_CATEGORY_NUMBER_LETTER   0x00000200
 Letter number code points, Nl in the Unicode database. More...
 
#define UTF8_CATEGORY_NUMBER_OTHER   0x00000400
 Other number code points, No in the Unicode database. More...
 
#define UTF8_CATEGORY_NUMBER
 Combined flag for all number categories. More...
 
#define UTF8_CATEGORY_PUNCTUATION_CONNECTOR   0x00000800
 Connector punctuation category, Pc in the Unicode database. More...
 
#define UTF8_CATEGORY_PUNCTUATION_DASH   0x00001000
 Dash punctuation category, Pd in the Unicode database. More...
 
#define UTF8_CATEGORY_PUNCTUATION_OPEN   0x00002000
 Open punctuation category, Ps in the Unicode database. More...
 
#define UTF8_CATEGORY_PUNCTUATION_CLOSE   0x00004000
 Close punctuation category, Pe in the Unicode database. More...
 
#define UTF8_CATEGORY_PUNCTUATION_INITIAL   0x00008000
 Initial punctuation category, Pi in the Unicode database. More...
 
#define UTF8_CATEGORY_PUNCTUATION_FINAL   0x00010000
 Final punctuation category, Pf in the Unicode database. More...
 
#define UTF8_CATEGORY_PUNCTUATION_OTHER   0x00020000
 Other punctuation category, Po in the Unicode database. More...
 
#define UTF8_CATEGORY_PUNCTUATION
 Combined flag for all punctuation categories. More...
 
#define UTF8_CATEGORY_SYMBOL_MATH   0x00040000
 Math symbol category, Sm in the Unicode database. More...
 
#define UTF8_CATEGORY_SYMBOL_CURRENCY   0x00080000
 Currency symbol category, Sc in the Unicode database. More...
 
#define UTF8_CATEGORY_SYMBOL_MODIFIER   0x00100000
 Modifier symbol category, Sk in the Unicode database. More...
 
#define UTF8_CATEGORY_SYMBOL_OTHER   0x00200000
 Other symbol category, So in the Unicode database. More...
 
#define UTF8_CATEGORY_SYMBOL
 Combined flag for all symbol categories. More...
 
#define UTF8_CATEGORY_SEPARATOR_SPACE   0x00400000
 Space separator category, Zs in the Unicode database. More...
 
#define UTF8_CATEGORY_SEPARATOR_LINE   0x00800000
 Line separator category, Zl in the Unicode database. More...
 
#define UTF8_CATEGORY_SEPARATOR_PARAGRAPH   0x01000000
 Paragraph separator category, Zp in the Unicode database. More...
 
#define UTF8_CATEGORY_SEPARATOR
 Combined flag for all separator categories. More...
 
#define UTF8_CATEGORY_CONTROL   0x02000000
 Control category, Cc in the Unicode database. More...
 
#define UTF8_CATEGORY_FORMAT   0x04000000
 Format category, Cf in the Unicode database. More...
 
#define UTF8_CATEGORY_SURROGATE   0x08000000
 Surrogate category, Cs in the Unicode database. More...
 
#define UTF8_CATEGORY_PRIVATE_USE   0x10000000
 Private use category, Co in the Unicode database. More...
 
#define UTF8_CATEGORY_UNASSIGNED   0x20000000
 Unassigned category, Cn in the Unicode database. More...
 
#define UTF8_CATEGORY_COMPATIBILITY   0x40000000
 Flag used for maintaining backwards compatibility with POSIX functions, not found in the Unicode database. More...
 
#define UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER   0x80000000
 Flag used for checking only the general category of code points at the start of a grapheme cluster. More...
 
#define UTF8_CATEGORY_ISCNTRL
 Flag used for maintaining backwards compatibility with POSIX iscntrl function. More...
 
#define UTF8_CATEGORY_ISPRINT
 Flag used for maintaining backwards compatibility with POSIX isprint function. More...
 
#define UTF8_CATEGORY_ISSPACE
 Flag used for maintaining backwards compatibility with POSIX isspace function. More...
 
#define UTF8_CATEGORY_ISBLANK
 Flag used for maintaining backwards compatibility with POSIX isblank function. More...
 
#define UTF8_CATEGORY_ISGRAPH
 Flag used for maintaining backwards compatibility with POSIX isgraph function. More...
 
#define UTF8_CATEGORY_ISPUNCT
 Flag used for maintaining backwards compatibility with POSIX ispunct function. More...
 
#define UTF8_CATEGORY_ISALNUM
 Flag used for maintaining backwards compatibility with POSIX isalnum function. More...
 
#define UTF8_CATEGORY_ISALPHA
 Flag used for maintaining backwards compatibility with POSIX isalpha function. More...
 
#define UTF8_CATEGORY_ISUPPER
 Flag used for maintaining backwards compatibility with POSIX isupper function. More...
 
#define UTF8_CATEGORY_ISLOWER
 Flag used for maintaining backwards compatibility with POSIX islower function. More...
 
#define UTF8_CATEGORY_ISDIGIT
 Flag used for maintaining backwards compatibility with POSIX isdigit function. More...
 
#define UTF8_CATEGORY_ISXDIGIT
 Flag used for maintaining backwards compatibility with POSIX isxdigit function. More...
 

Detailed Description

Flags to be used with utf8iscategory, to check whether code points in a string are part of that category.

Macro Definition Documentation

◆ UTF8_CATEGORY_LETTER_UPPERCASE

#define UTF8_CATEGORY_LETTER_UPPERCASE   0x00000001

Uppercase letter code points, Lu in the Unicode database.

◆ UTF8_CATEGORY_LETTER_LOWERCASE

#define UTF8_CATEGORY_LETTER_LOWERCASE   0x00000002

Lowercase letter code points, Ll in the Unicode database.

◆ UTF8_CATEGORY_LETTER_TITLECASE

#define UTF8_CATEGORY_LETTER_TITLECASE   0x00000004

Titlecase letter code points, Lt in the Unicode database.

◆ UTF8_CATEGORY_LETTER_MODIFIER

#define UTF8_CATEGORY_LETTER_MODIFIER   0x00000008

Modifier letter code points, Lm in the Unicode database.

◆ UTF8_CATEGORY_LETTER_OTHER

#define UTF8_CATEGORY_LETTER_OTHER   0x00000010

Other letter code points, Lo in the Unicode database.

◆ UTF8_CATEGORY_LETTER

#define UTF8_CATEGORY_LETTER
Value:
UTF8_CATEGORY_LETTER_TITLECASE | UTF8_CATEGORY_LETTER_MODIFIER | \
UTF8_CATEGORY_LETTER_OTHER)

Combined flag for all letter categories.

◆ UTF8_CATEGORY_CASE_MAPPED

#define UTF8_CATEGORY_CASE_MAPPED
Value:
UTF8_CATEGORY_LETTER_TITLECASE)

Combined flag for all letter categories with case mapping.

◆ UTF8_CATEGORY_MARK_NON_SPACING

#define UTF8_CATEGORY_MARK_NON_SPACING   0x00000020

Non-spacing mark code points, Mn in the Unicode database.

◆ UTF8_CATEGORY_MARK_SPACING

#define UTF8_CATEGORY_MARK_SPACING   0x00000040

Spacing mark code points, Mc in the Unicode database.

◆ UTF8_CATEGORY_MARK_ENCLOSING

#define UTF8_CATEGORY_MARK_ENCLOSING   0x00000080

Enclosing mark code points, Me in the Unicode database.

◆ UTF8_CATEGORY_MARK

#define UTF8_CATEGORY_MARK
Value:
UTF8_CATEGORY_MARK_ENCLOSING)

Combined flag for all mark categories.

◆ UTF8_CATEGORY_NUMBER_DECIMAL

#define UTF8_CATEGORY_NUMBER_DECIMAL   0x00000100

Decimal number code points, Nd in the Unicode database.

◆ UTF8_CATEGORY_NUMBER_LETTER

#define UTF8_CATEGORY_NUMBER_LETTER   0x00000200

Letter number code points, Nl in the Unicode database.

◆ UTF8_CATEGORY_NUMBER_OTHER

#define UTF8_CATEGORY_NUMBER_OTHER   0x00000400

Other number code points, No in the Unicode database.

◆ UTF8_CATEGORY_NUMBER

#define UTF8_CATEGORY_NUMBER
Value:
UTF8_CATEGORY_NUMBER_OTHER)

Combined flag for all number categories.

◆ UTF8_CATEGORY_PUNCTUATION_CONNECTOR

#define UTF8_CATEGORY_PUNCTUATION_CONNECTOR   0x00000800

Connector punctuation category, Pc in the Unicode database.

◆ UTF8_CATEGORY_PUNCTUATION_DASH

#define UTF8_CATEGORY_PUNCTUATION_DASH   0x00001000

Dash punctuation category, Pd in the Unicode database.

◆ UTF8_CATEGORY_PUNCTUATION_OPEN

#define UTF8_CATEGORY_PUNCTUATION_OPEN   0x00002000

Open punctuation category, Ps in the Unicode database.

◆ UTF8_CATEGORY_PUNCTUATION_CLOSE

#define UTF8_CATEGORY_PUNCTUATION_CLOSE   0x00004000

Close punctuation category, Pe in the Unicode database.

◆ UTF8_CATEGORY_PUNCTUATION_INITIAL

#define UTF8_CATEGORY_PUNCTUATION_INITIAL   0x00008000

Initial punctuation category, Pi in the Unicode database.

◆ UTF8_CATEGORY_PUNCTUATION_FINAL

#define UTF8_CATEGORY_PUNCTUATION_FINAL   0x00010000

Final punctuation category, Pf in the Unicode database.

◆ UTF8_CATEGORY_PUNCTUATION_OTHER

#define UTF8_CATEGORY_PUNCTUATION_OTHER   0x00020000

Other punctuation category, Po in the Unicode database.

◆ UTF8_CATEGORY_PUNCTUATION

#define UTF8_CATEGORY_PUNCTUATION
Value:
UTF8_CATEGORY_PUNCTUATION_OPEN | UTF8_CATEGORY_PUNCTUATION_CLOSE | \
UTF8_CATEGORY_PUNCTUATION_INITIAL | UTF8_CATEGORY_PUNCTUATION_FINAL | \
UTF8_CATEGORY_PUNCTUATION_OTHER)

Combined flag for all punctuation categories.

◆ UTF8_CATEGORY_SYMBOL_MATH

#define UTF8_CATEGORY_SYMBOL_MATH   0x00040000

Math symbol category, Sm in the Unicode database.

◆ UTF8_CATEGORY_SYMBOL_CURRENCY

#define UTF8_CATEGORY_SYMBOL_CURRENCY   0x00080000

Currency symbol category, Sc in the Unicode database.

◆ UTF8_CATEGORY_SYMBOL_MODIFIER

#define UTF8_CATEGORY_SYMBOL_MODIFIER   0x00100000

Modifier symbol category, Sk in the Unicode database.

◆ UTF8_CATEGORY_SYMBOL_OTHER

#define UTF8_CATEGORY_SYMBOL_OTHER   0x00200000

Other symbol category, So in the Unicode database.

◆ UTF8_CATEGORY_SYMBOL

#define UTF8_CATEGORY_SYMBOL
Value:

Combined flag for all symbol categories.

◆ UTF8_CATEGORY_SEPARATOR_SPACE

#define UTF8_CATEGORY_SEPARATOR_SPACE   0x00400000

Space separator category, Zs in the Unicode database.

◆ UTF8_CATEGORY_SEPARATOR_LINE

#define UTF8_CATEGORY_SEPARATOR_LINE   0x00800000

Line separator category, Zl in the Unicode database.

◆ UTF8_CATEGORY_SEPARATOR_PARAGRAPH

#define UTF8_CATEGORY_SEPARATOR_PARAGRAPH   0x01000000

Paragraph separator category, Zp in the Unicode database.

◆ UTF8_CATEGORY_SEPARATOR

#define UTF8_CATEGORY_SEPARATOR
Value:
UTF8_CATEGORY_SEPARATOR_PARAGRAPH)

Combined flag for all separator categories.

◆ UTF8_CATEGORY_CONTROL

#define UTF8_CATEGORY_CONTROL   0x02000000

Control category, Cc in the Unicode database.

◆ UTF8_CATEGORY_FORMAT

#define UTF8_CATEGORY_FORMAT   0x04000000

Format category, Cf in the Unicode database.

◆ UTF8_CATEGORY_SURROGATE

#define UTF8_CATEGORY_SURROGATE   0x08000000

Surrogate category, Cs in the Unicode database.

◆ UTF8_CATEGORY_PRIVATE_USE

#define UTF8_CATEGORY_PRIVATE_USE   0x10000000

Private use category, Co in the Unicode database.

◆ UTF8_CATEGORY_UNASSIGNED

#define UTF8_CATEGORY_UNASSIGNED   0x20000000

Unassigned category, Cn in the Unicode database.

◆ UTF8_CATEGORY_COMPATIBILITY

#define UTF8_CATEGORY_COMPATIBILITY   0x40000000

Flag used for maintaining backwards compatibility with POSIX functions, not found in the Unicode database.

◆ UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER

#define UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER   0x80000000

Flag used for checking only the general category of code points at the start of a grapheme cluster.

◆ UTF8_CATEGORY_ISCNTRL

#define UTF8_CATEGORY_ISCNTRL
Value:
UTF8_CATEGORY_CONTROL)

Flag used for maintaining backwards compatibility with POSIX iscntrl function.

◆ UTF8_CATEGORY_ISPRINT

#define UTF8_CATEGORY_ISPRINT
Value:
UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL | \
UTF8_CATEGORY_SEPARATOR)

Flag used for maintaining backwards compatibility with POSIX isprint function.

◆ UTF8_CATEGORY_ISSPACE

#define UTF8_CATEGORY_ISSPACE
Value:
UTF8_CATEGORY_SEPARATOR_SPACE)

Flag used for maintaining backwards compatibility with POSIX isspace function.

◆ UTF8_CATEGORY_ISBLANK

#define UTF8_CATEGORY_ISBLANK
Value:
UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_PRIVATE_USE)

Flag used for maintaining backwards compatibility with POSIX isblank function.

◆ UTF8_CATEGORY_ISGRAPH

#define UTF8_CATEGORY_ISGRAPH
Value:
UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)

Flag used for maintaining backwards compatibility with POSIX isgraph function.

◆ UTF8_CATEGORY_ISPUNCT

#define UTF8_CATEGORY_ISPUNCT
Value:
UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)

Flag used for maintaining backwards compatibility with POSIX ispunct function.

◆ UTF8_CATEGORY_ISALNUM

#define UTF8_CATEGORY_ISALNUM
Value:
UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER)

Flag used for maintaining backwards compatibility with POSIX isalnum function.

◆ UTF8_CATEGORY_ISALPHA

#define UTF8_CATEGORY_ISALPHA
Value:
UTF8_CATEGORY_LETTER)

Flag used for maintaining backwards compatibility with POSIX isalpha function.

◆ UTF8_CATEGORY_ISUPPER

#define UTF8_CATEGORY_ISUPPER
Value:
UTF8_CATEGORY_LETTER_UPPERCASE)

Flag used for maintaining backwards compatibility with POSIX isupper function.

◆ UTF8_CATEGORY_ISLOWER

#define UTF8_CATEGORY_ISLOWER
Value:
UTF8_CATEGORY_LETTER_LOWERCASE)

Flag used for maintaining backwards compatibility with POSIX islower function.

◆ UTF8_CATEGORY_ISDIGIT

#define UTF8_CATEGORY_ISDIGIT
Value:
UTF8_CATEGORY_NUMBER)

Flag used for maintaining backwards compatibility with POSIX isdigit function.

◆ UTF8_CATEGORY_ISXDIGIT

#define UTF8_CATEGORY_ISXDIGIT
Value:

Flag used for maintaining backwards compatibility with POSIX isxdigit function.

UTF8_CATEGORY_SYMBOL_OTHER
#define UTF8_CATEGORY_SYMBOL_OTHER
Other symbol category, So in the Unicode database.
Definition: utf8rewind.h:432
UTF8_CATEGORY_MARK_SPACING
#define UTF8_CATEGORY_MARK_SPACING
Spacing mark code points, Mc in the Unicode database.
Definition: utf8rewind.h:316
UTF8_CATEGORY_SEPARATOR_SPACE
#define UTF8_CATEGORY_SEPARATOR_SPACE
Space separator category, Zs in the Unicode database.
Definition: utf8rewind.h:446
UTF8_CATEGORY_SYMBOL_MATH
#define UTF8_CATEGORY_SYMBOL_MATH
Math symbol category, Sm in the Unicode database.
Definition: utf8rewind.h:414
UTF8_CATEGORY_PUNCTUATION_FINAL
#define UTF8_CATEGORY_PUNCTUATION_FINAL
Final punctuation category, Pf in the Unicode database.
Definition: utf8rewind.h:392
UTF8_CATEGORY_LETTER_MODIFIER
#define UTF8_CATEGORY_LETTER_MODIFIER
Modifier letter code points, Lm in the Unicode database.
Definition: utf8rewind.h:281
UTF8_CATEGORY_PUNCTUATION_CONNECTOR
#define UTF8_CATEGORY_PUNCTUATION_CONNECTOR
Connector punctuation category, Pc in the Unicode database.
Definition: utf8rewind.h:362
UTF8_CATEGORY_LETTER_LOWERCASE
#define UTF8_CATEGORY_LETTER_LOWERCASE
Lowercase letter code points, Ll in the Unicode database.
Definition: utf8rewind.h:269
UTF8_CATEGORY_NUMBER_LETTER
#define UTF8_CATEGORY_NUMBER_LETTER
Letter number code points, Nl in the Unicode database.
Definition: utf8rewind.h:342
UTF8_CATEGORY_MARK_NON_SPACING
#define UTF8_CATEGORY_MARK_NON_SPACING
Non-spacing mark code points, Mn in the Unicode database.
Definition: utf8rewind.h:310
UTF8_CATEGORY_PUNCTUATION_CLOSE
#define UTF8_CATEGORY_PUNCTUATION_CLOSE
Close punctuation category, Pe in the Unicode database.
Definition: utf8rewind.h:380
UTF8_CATEGORY_COMPATIBILITY
#define UTF8_CATEGORY_COMPATIBILITY
Flag used for maintaining backwards compatibility with POSIX functions, not found in the Unicode data...
Definition: utf8rewind.h:503
UTF8_CATEGORY_PRIVATE_USE
#define UTF8_CATEGORY_PRIVATE_USE
Private use category, Co in the Unicode database.
Definition: utf8rewind.h:490
UTF8_CATEGORY_SYMBOL
#define UTF8_CATEGORY_SYMBOL
Combined flag for all symbol categories.
Definition: utf8rewind.h:438
UTF8_CATEGORY_NUMBER
#define UTF8_CATEGORY_NUMBER
Combined flag for all number categories.
Definition: utf8rewind.h:354
UTF8_CATEGORY_LETTER_UPPERCASE
#define UTF8_CATEGORY_LETTER_UPPERCASE
Uppercase letter code points, Lu in the Unicode database.
Definition: utf8rewind.h:263
UTF8_CATEGORY_NUMBER_DECIMAL
#define UTF8_CATEGORY_NUMBER_DECIMAL
Decimal number code points, Nd in the Unicode database.
Definition: utf8rewind.h:336
UTF8_CATEGORY_SYMBOL_CURRENCY
#define UTF8_CATEGORY_SYMBOL_CURRENCY
Currency symbol category, Sc in the Unicode database.
Definition: utf8rewind.h:420
UTF8_CATEGORY_PUNCTUATION_DASH
#define UTF8_CATEGORY_PUNCTUATION_DASH
Dash punctuation category, Pd in the Unicode database.
Definition: utf8rewind.h:368
UTF8_CATEGORY_SEPARATOR_LINE
#define UTF8_CATEGORY_SEPARATOR_LINE
Line separator category, Zl in the Unicode database.
Definition: utf8rewind.h:452