HTML Tidy
0.1
|
00001 #ifndef __UTF8_H__ 00002 #define __UTF8_H__ 00003 00004 /* utf8.h -- convert characters to/from UTF-8 00005 00006 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 00007 See tidy.h for the copyright notice. 00008 00009 CVS Info : 00010 00011 $Author: arnaud02 $ 00012 $Date: 2006/09/12 15:14:44 $ 00013 $Revision: 1.5 $ 00014 00015 */ 00016 00017 #include "platform.h" 00018 #include "buffio.h" 00019 00020 /* UTF-8 encoding/decoding support 00021 ** Does not convert character "codepoints", i.e. to/from 10646. 00022 */ 00023 00024 int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes, 00025 TidyInputSource* inp, int* count ); 00026 00027 int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf, 00028 TidyOutputSink* outp, int* count ); 00029 00030 00031 uint TY_(GetUTF8)( ctmbstr str, uint *ch ); 00032 tmbstr TY_(PutUTF8)( tmbstr buf, uint c ); 00033 00034 #define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */ 00035 #define UNICODE_BOM UNICODE_BOM_BE 00036 #define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */ 00037 #define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */ 00038 00039 00040 Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 ); 00041 Bool TY_(IsHighSurrogate)( tchar ch ); 00042 Bool TY_(IsLowSurrogate)( tchar ch ); 00043 00044 Bool TY_(IsCombinedChar)( tchar ch ); 00045 Bool TY_(IsValidCombinedChar)( tchar ch ); 00046 00047 tchar TY_(CombineSurrogatePair)( tchar high, tchar low ); 00048 Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low ); 00049 00050 00051 00052 #endif /* __UTF8_H__ */