Version:
~ [ 1.0 ] ~
1 #ifndef __UTF8_H__
2 #define __UTF8_H__
3
4 /* utf8.h -- convert characters to/from UTF-8
5
6 (c) 1998-2004 (W3C) MIT, ERCIM, Keio University
7 See tidy.h for the copyright notice.
8
9 CVS Info :
10
11 $Author: terry_teague $
12 $Date: 2004/08/02 02:32:47 $
13 $Revision: 1.4 $
14
15 */
16
17 #include "platform.h"
18 #include "buffio.h"
19
20 /* UTF-8 encoding/decoding support
21 ** Does not convert character "codepoints", i.e. to/from 10646.
22 */
23
24 int DecodeUTF8BytesToChar( uint* c, uint firstByte, ctmbstr successorBytes,
25 TidyInputSource* inp, int* count );
26
27 int EncodeCharToUTF8Bytes( uint c, tmbstr encodebuf,
28 TidyOutputSink* outp, int* count );
29
30
31 uint GetUTF8( ctmbstr str, uint *ch );
32 tmbstr PutUTF8( tmbstr buf, uint c );
33
34 #define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
35 #define UNICODE_BOM UNICODE_BOM_BE
36 #define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
37 #define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
38
39
40 Bool IsValidUTF16FromUCS4( tchar ucs4 );
41 Bool IsHighSurrogate( tchar ch );
42 Bool IsLowSurrogate( tchar ch );
43
44 Bool IsCombinedChar( tchar ch );
45 Bool IsValidCombinedChar( tchar ch );
46
47 tchar CombineSurrogatePair( tchar high, tchar low );
48 Bool SplitSurrogatePair( tchar utf16, tchar* high, tchar* low );
49
50
51
52 #endif /* __UTF8_H__ */
53
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.