~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/streamio.h

Version: ~ [ 1.0 ] ~

** Warning: Cannot open xref database.

1 #ifndef __STREAMIO_H__ 2 #define __STREAMIO_H__ 3 4 /* streamio.h -- handles character stream I/O 5 6 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University 7 See tidy.h for the copyright notice. 8 9 CVS Info : 10 11 $Author: arnaud02 $ 12 $Date: 2005/03/03 12:49:24 $ 13 $Revision: 1.14 $ 14 15 Wrapper around Tidy input source and output sink 16 that calls appropriate interfaces, and applies 17 necessary char encoding transformations: to/from 18 ISO-10646 and/or UTF-8. 19 20 */ 21 22 #include "forward.h" 23 #include "buffio.h" 24 #include "fileio.h" 25 26 #ifdef __cplusplus 27 extern "C" 28 { 29 #endif 30 typedef enum 31 { 32 FileIO, 33 BufferIO, 34 UserIO 35 } IOType; 36 37 /* states for ISO 2022 38 39 A document in ISO-2022 based encoding uses some ESC sequences called 40 "designator" to switch character sets. The designators defined and 41 used in ISO-2022-JP are: 42 43 "ESC" + "(" + ? for ISO646 variants 44 45 "ESC" + "$" + ? and 46 "ESC" + "$" + "(" + ? for multibyte character sets 47 */ 48 typedef enum 49 { 50 FSM_ASCII, 51 FSM_ESC, 52 FSM_ESCD, 53 FSM_ESCDP, 54 FSM_ESCP, 55 FSM_NONASCII 56 } ISO2022State; 57 58 /************************ 59 ** Source 60 ************************/ 61 62 #define CHARBUF_SIZE 5 63 64 /* non-raw input is cleaned up*/ 65 struct _StreamIn 66 { 67 ISO2022State state; /* FSM for ISO2022 */ 68 Bool pushed; 69 tchar* charbuf; 70 uint bufpos; 71 uint bufsize; 72 int tabs; 73 int lastcol; 74 int curcol; 75 int curline; 76 int encoding; 77 IOType iotype; 78 79 TidyInputSource source; 80 81 #ifdef TIDY_WIN32_MLANG_SUPPORT 82 ulong mlang; 83 #endif 84 85 #ifdef TIDY_STORE_ORIGINAL_TEXT 86 tmbstr otextbuf; 87 size_t otextsize; 88 uint otextlen; 89 #endif 90 91 /* Pointer back to document for error reporting */ 92 TidyDocImpl* doc; 93 }; 94 95 void freeStreamIn(StreamIn* in); 96 97 StreamIn* FileInput( TidyDocImpl* doc, FILE* fp, int encoding ); 98 StreamIn* BufferInput( TidyDocImpl* doc, TidyBuffer* content, int encoding ); 99 StreamIn* UserInput( TidyDocImpl* doc, TidyInputSource* source, int encoding ); 100 101 int ReadBOMEncoding(StreamIn *in); 102 uint ReadChar( StreamIn* in ); 103 void UngetChar( uint c, StreamIn* in ); 104 uint PopChar( StreamIn *in ); 105 Bool IsEOF( StreamIn* in ); 106 107 108 /************************ 109 ** Sink 110 ************************/ 111 112 struct _StreamOut 113 { 114 int encoding; 115 ISO2022State state; /* for ISO 2022 */ 116 uint nl; 117 118 #ifdef TIDY_WIN32_MLANG_SUPPORT 119 ulong mlang; 120 #endif 121 122 IOType iotype; 123 TidyOutputSink sink; 124 }; 125 126 StreamOut* FileOutput( FILE* fp, int encoding, uint newln ); 127 StreamOut* BufferOutput( TidyBuffer* buf, int encoding, uint newln ); 128 StreamOut* UserOutput( TidyOutputSink* sink, int encoding, uint newln ); 129 130 StreamOut* StdErrOutput(void); 131 StreamOut* StdOutOutput(void); 132 void ReleaseStreamOut( StreamOut* out ); 133 134 void WriteChar( uint c, StreamOut* out ); 135 void outBOM( StreamOut *out ); 136 137 ctmbstr GetEncodingNameFromTidyId(uint id); 138 ctmbstr GetEncodingOptNameFromTidyId(uint id); 139 int GetCharEncodingFromOptName(ctmbstr charenc); 140 141 /************************ 142 ** Misc 143 ************************/ 144 145 /* character encodings 146 */ 147 #define RAW 0 148 #define ASCII 1 149 #define LATIN0 2 150 #define LATIN1 3 151 #define UTF8 4 152 #define ISO2022 5 153 #define MACROMAN 6 154 #define WIN1252 7 155 #define IBM858 8 156 157 #if SUPPORT_UTF16_ENCODINGS 158 #define UTF16LE 9 159 #define UTF16BE 10 160 #define UTF16 11 161 #endif 162 163 /* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints 164 ** (i.e., to Unicode) before being recoded into UTF-8. This may be 165 ** confusing: usually UTF-8 implies ISO10646 codepoints. 166 */ 167 #if SUPPORT_ASIAN_ENCODINGS 168 #if SUPPORT_UTF16_ENCODINGS 169 #define BIG5 12 170 #define SHIFTJIS 13 171 #else 172 #define BIG5 9 173 #define SHIFTJIS 10 174 #endif 175 #endif 176 177 #ifdef TIDY_WIN32_MLANG_SUPPORT 178 /* hack: windows code page numbers start at 37 */ 179 #define WIN32MLANG 36 180 #endif 181 182 183 /* char encoding used when replacing illegal SGML chars, 184 ** regardless of specified encoding. Set at compile time 185 ** to either Windows or Mac. 186 */ 187 extern const int ReplacementCharEncoding; 188 189 /* Function for conversion from Windows-1252 to Unicode */ 190 uint DecodeWin1252(uint c); 191 192 /* Function to convert from MacRoman to Unicode */ 193 uint DecodeMacRoman(uint c); 194 195 /* Function for conversion from OS/2-850 to Unicode */ 196 uint DecodeIbm850(uint c); 197 198 /* Function for conversion from Latin0 to Unicode */ 199 uint DecodeLatin0(uint c); 200 201 /* Function to convert from Symbol Font chars to Unicode */ 202 uint DecodeSymbolFont(uint c); 203 #ifdef __cplusplus 204 } 205 #endif 206 207 208 /* Use numeric constants as opposed to escape chars (\r, \n) 209 ** to avoid conflict Mac compilers that may re-define these. 210 */ 211 #define CR 0xD 212 #define LF 0xA 213 214 #if defined(MAC_OS_CLASSIC) 215 #define DEFAULT_NL_CONFIG TidyCR 216 #elif defined(_WIN32) || defined(OS2_OS) 217 #define DEFAULT_NL_CONFIG TidyCRLF 218 #else 219 #define DEFAULT_NL_CONFIG TidyLF 220 #endif 221 222 223 #endif /* __STREAMIO_H__ */ 224

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.