~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

TidyLib
tidy/src/streamio.h

Version: ~ [ 1.0 ] ~

  1 #ifndef __STREAMIO_H__
  2 #define __STREAMIO_H__
  3 
  4 /* streamio.h -- handles character stream I/O
  5 
  6   (c) 1998-2005 (W3C) MIT, ERCIM, Keio University
  7   See tidy.h for the copyright notice.
  8 
  9   CVS Info :
 10 
 11     $Author: arnaud02 $ 
 12     $Date: 2005/03/03 12:49:24 $ 
 13     $Revision: 1.14 $ 
 14 
 15   Wrapper around Tidy input source and output sink
 16   that calls appropriate interfaces, and applies 
 17   necessary char encoding transformations: to/from
 18   ISO-10646 and/or UTF-8.
 19 
 20 */
 21 
 22 #include "forward.h"
 23 #include "buffio.h"
 24 #include "fileio.h"
 25 
 26 #ifdef __cplusplus
 27 extern "C"
 28 {
 29 #endif
 30 typedef enum
 31 {
 32   FileIO,
 33   BufferIO,
 34   UserIO
 35 } IOType;
 36 
 37 /* states for ISO 2022
 38 
 39  A document in ISO-2022 based encoding uses some ESC sequences called
 40  "designator" to switch character sets. The designators defined and
 41  used in ISO-2022-JP are:
 42 
 43     "ESC" + "(" + ?     for ISO646 variants
 44 
 45     "ESC" + "$" + ?     and
 46     "ESC" + "$" + "(" + ?   for multibyte character sets
 47 */
 48 typedef enum
 49 {
 50   FSM_ASCII,
 51   FSM_ESC,
 52   FSM_ESCD,
 53   FSM_ESCDP,
 54   FSM_ESCP,
 55   FSM_NONASCII
 56 } ISO2022State;
 57 
 58 /************************
 59 ** Source
 60 ************************/
 61 
 62 #define CHARBUF_SIZE 5
 63 
 64 /* non-raw input is cleaned up*/
 65 struct _StreamIn
 66 {
 67     ISO2022State    state;     /* FSM for ISO2022 */
 68     Bool   pushed;
 69     tchar* charbuf;
 70     uint   bufpos;
 71     uint   bufsize;
 72     int    tabs;
 73     int    lastcol;
 74     int    curcol;
 75     int    curline;
 76     int    encoding;
 77     IOType iotype;
 78 
 79     TidyInputSource source;
 80 
 81 #ifdef TIDY_WIN32_MLANG_SUPPORT
 82     ulong  mlang;
 83 #endif
 84 
 85 #ifdef TIDY_STORE_ORIGINAL_TEXT
 86     tmbstr otextbuf;
 87     size_t otextsize;
 88     uint   otextlen;
 89 #endif
 90 
 91     /* Pointer back to document for error reporting */
 92     TidyDocImpl* doc;
 93 };
 94 
 95 void freeStreamIn(StreamIn* in);
 96 
 97 StreamIn* FileInput( TidyDocImpl* doc, FILE* fp, int encoding );
 98 StreamIn* BufferInput( TidyDocImpl* doc, TidyBuffer* content, int encoding );
 99 StreamIn* UserInput( TidyDocImpl* doc, TidyInputSource* source, int encoding );
100 
101 int       ReadBOMEncoding(StreamIn *in);
102 uint      ReadChar( StreamIn* in );
103 void      UngetChar( uint c, StreamIn* in );
104 uint      PopChar( StreamIn *in );
105 Bool      IsEOF( StreamIn* in );
106 
107 
108 /************************
109 ** Sink
110 ************************/
111 
112 struct _StreamOut
113 {
114     int   encoding;
115     ISO2022State   state;     /* for ISO 2022 */
116     uint  nl;
117 
118 #ifdef TIDY_WIN32_MLANG_SUPPORT
119     ulong mlang;
120 #endif
121 
122     IOType iotype;
123     TidyOutputSink sink;
124 };
125 
126 StreamOut* FileOutput( FILE* fp, int encoding, uint newln );
127 StreamOut* BufferOutput( TidyBuffer* buf, int encoding, uint newln );
128 StreamOut* UserOutput( TidyOutputSink* sink, int encoding, uint newln );
129 
130 StreamOut* StdErrOutput(void);
131 StreamOut* StdOutOutput(void);
132 void       ReleaseStreamOut( StreamOut* out );
133 
134 void WriteChar( uint c, StreamOut* out );
135 void outBOM( StreamOut *out );
136 
137 ctmbstr GetEncodingNameFromTidyId(uint id);
138 ctmbstr GetEncodingOptNameFromTidyId(uint id);
139 int GetCharEncodingFromOptName(ctmbstr charenc);
140 
141 /************************
142 ** Misc
143 ************************/
144 
145 /* character encodings
146 */
147 #define RAW         0
148 #define ASCII       1
149 #define LATIN0      2
150 #define LATIN1      3
151 #define UTF8        4
152 #define ISO2022     5
153 #define MACROMAN    6
154 #define WIN1252     7
155 #define IBM858      8
156 
157 #if SUPPORT_UTF16_ENCODINGS
158 #define UTF16LE     9
159 #define UTF16BE     10
160 #define UTF16       11
161 #endif
162 
163 /* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints
164 ** (i.e., to Unicode) before being recoded into UTF-8. This may be
165 ** confusing: usually UTF-8 implies ISO10646 codepoints.
166 */
167 #if SUPPORT_ASIAN_ENCODINGS
168 #if SUPPORT_UTF16_ENCODINGS
169 #define BIG5        12
170 #define SHIFTJIS    13
171 #else
172 #define BIG5        9
173 #define SHIFTJIS    10
174 #endif
175 #endif
176 
177 #ifdef TIDY_WIN32_MLANG_SUPPORT
178 /* hack: windows code page numbers start at 37 */
179 #define WIN32MLANG  36
180 #endif
181 
182 
183 /* char encoding used when replacing illegal SGML chars,
184 ** regardless of specified encoding.  Set at compile time
185 ** to either Windows or Mac.
186 */
187 extern const int ReplacementCharEncoding;
188 
189 /* Function for conversion from Windows-1252 to Unicode */
190 uint DecodeWin1252(uint c);
191 
192 /* Function to convert from MacRoman to Unicode */
193 uint DecodeMacRoman(uint c);
194 
195 /* Function for conversion from OS/2-850 to Unicode */
196 uint DecodeIbm850(uint c);
197 
198 /* Function for conversion from Latin0 to Unicode */
199 uint DecodeLatin0(uint c);
200 
201 /* Function to convert from Symbol Font chars to Unicode */
202 uint DecodeSymbolFont(uint c);
203 #ifdef __cplusplus
204 }
205 #endif
206 
207 
208 /* Use numeric constants as opposed to escape chars (\r, \n)
209 ** to avoid conflict Mac compilers that may re-define these.
210 */
211 #define CR    0xD
212 #define LF    0xA
213 
214 #if   defined(MAC_OS_CLASSIC)
215 #define DEFAULT_NL_CONFIG TidyCR
216 #elif defined(_WIN32) || defined(OS2_OS)
217 #define DEFAULT_NL_CONFIG TidyCRLF
218 #else
219 #define DEFAULT_NL_CONFIG TidyLF
220 #endif
221 
222 
223 #endif /* __STREAMIO_H__ */
224 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.