Version:
~ [ 1.0 ] ~
1 #ifndef __STREAMIO_H__
2 #define __STREAMIO_H__
3
4 /* streamio.h -- handles character stream I/O
5
6 (c) 1998-2005 (W3C) MIT, ERCIM, Keio University
7 See tidy.h for the copyright notice.
8
9 CVS Info :
10
11 $Author: arnaud02 $
12 $Date: 2005/03/03 12:49:24 $
13 $Revision: 1.14 $
14
15 Wrapper around Tidy input source and output sink
16 that calls appropriate interfaces, and applies
17 necessary char encoding transformations: to/from
18 ISO-10646 and/or UTF-8.
19
20 */
21
22 #include "forward.h"
23 #include "buffio.h"
24 #include "fileio.h"
25
26 #ifdef __cplusplus
27 extern "C"
28 {
29 #endif
30 typedef enum
31 {
32 FileIO,
33 BufferIO,
34 UserIO
35 } IOType;
36
37 /* states for ISO 2022
38
39 A document in ISO-2022 based encoding uses some ESC sequences called
40 "designator" to switch character sets. The designators defined and
41 used in ISO-2022-JP are:
42
43 "ESC" + "(" + ? for ISO646 variants
44
45 "ESC" + "$" + ? and
46 "ESC" + "$" + "(" + ? for multibyte character sets
47 */
48 typedef enum
49 {
50 FSM_ASCII,
51 FSM_ESC,
52 FSM_ESCD,
53 FSM_ESCDP,
54 FSM_ESCP,
55 FSM_NONASCII
56 } ISO2022State;
57
58 /************************
59 ** Source
60 ************************/
61
62 #define CHARBUF_SIZE 5
63
64 /* non-raw input is cleaned up*/
65 struct _StreamIn
66 {
67 ISO2022State state; /* FSM for ISO2022 */
68 Bool pushed;
69 tchar* charbuf;
70 uint bufpos;
71 uint bufsize;
72 int tabs;
73 int lastcol;
74 int curcol;
75 int curline;
76 int encoding;
77 IOType iotype;
78
79 TidyInputSource source;
80
81 #ifdef TIDY_WIN32_MLANG_SUPPORT
82 ulong mlang;
83 #endif
84
85 #ifdef TIDY_STORE_ORIGINAL_TEXT
86 tmbstr otextbuf;
87 size_t otextsize;
88 uint otextlen;
89 #endif
90
91 /* Pointer back to document for error reporting */
92 TidyDocImpl* doc;
93 };
94
95 void freeStreamIn(StreamIn* in);
96
97 StreamIn* FileInput( TidyDocImpl* doc, FILE* fp, int encoding );
98 StreamIn* BufferInput( TidyDocImpl* doc, TidyBuffer* content, int encoding );
99 StreamIn* UserInput( TidyDocImpl* doc, TidyInputSource* source, int encoding );
100
101 int ReadBOMEncoding(StreamIn *in);
102 uint ReadChar( StreamIn* in );
103 void UngetChar( uint c, StreamIn* in );
104 uint PopChar( StreamIn *in );
105 Bool IsEOF( StreamIn* in );
106
107
108 /************************
109 ** Sink
110 ************************/
111
112 struct _StreamOut
113 {
114 int encoding;
115 ISO2022State state; /* for ISO 2022 */
116 uint nl;
117
118 #ifdef TIDY_WIN32_MLANG_SUPPORT
119 ulong mlang;
120 #endif
121
122 IOType iotype;
123 TidyOutputSink sink;
124 };
125
126 StreamOut* FileOutput( FILE* fp, int encoding, uint newln );
127 StreamOut* BufferOutput( TidyBuffer* buf, int encoding, uint newln );
128 StreamOut* UserOutput( TidyOutputSink* sink, int encoding, uint newln );
129
130 StreamOut* StdErrOutput(void);
131 StreamOut* StdOutOutput(void);
132 void ReleaseStreamOut( StreamOut* out );
133
134 void WriteChar( uint c, StreamOut* out );
135 void outBOM( StreamOut *out );
136
137 ctmbstr GetEncodingNameFromTidyId(uint id);
138 ctmbstr GetEncodingOptNameFromTidyId(uint id);
139 int GetCharEncodingFromOptName(ctmbstr charenc);
140
141 /************************
142 ** Misc
143 ************************/
144
145 /* character encodings
146 */
147 #define RAW 0
148 #define ASCII 1
149 #define LATIN0 2
150 #define LATIN1 3
151 #define UTF8 4
152 #define ISO2022 5
153 #define MACROMAN 6
154 #define WIN1252 7
155 #define IBM858 8
156
157 #if SUPPORT_UTF16_ENCODINGS
158 #define UTF16LE 9
159 #define UTF16BE 10
160 #define UTF16 11
161 #endif
162
163 /* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints
164 ** (i.e., to Unicode) before being recoded into UTF-8. This may be
165 ** confusing: usually UTF-8 implies ISO10646 codepoints.
166 */
167 #if SUPPORT_ASIAN_ENCODINGS
168 #if SUPPORT_UTF16_ENCODINGS
169 #define BIG5 12
170 #define SHIFTJIS 13
171 #else
172 #define BIG5 9
173 #define SHIFTJIS 10
174 #endif
175 #endif
176
177 #ifdef TIDY_WIN32_MLANG_SUPPORT
178 /* hack: windows code page numbers start at 37 */
179 #define WIN32MLANG 36
180 #endif
181
182
183 /* char encoding used when replacing illegal SGML chars,
184 ** regardless of specified encoding. Set at compile time
185 ** to either Windows or Mac.
186 */
187 extern const int ReplacementCharEncoding;
188
189 /* Function for conversion from Windows-1252 to Unicode */
190 uint DecodeWin1252(uint c);
191
192 /* Function to convert from MacRoman to Unicode */
193 uint DecodeMacRoman(uint c);
194
195 /* Function for conversion from OS/2-850 to Unicode */
196 uint DecodeIbm850(uint c);
197
198 /* Function for conversion from Latin0 to Unicode */
199 uint DecodeLatin0(uint c);
200
201 /* Function to convert from Symbol Font chars to Unicode */
202 uint DecodeSymbolFont(uint c);
203 #ifdef __cplusplus
204 }
205 #endif
206
207
208 /* Use numeric constants as opposed to escape chars (\r, \n)
209 ** to avoid conflict Mac compilers that may re-define these.
210 */
211 #define CR 0xD
212 #define LF 0xA
213
214 #if defined(MAC_OS_CLASSIC)
215 #define DEFAULT_NL_CONFIG TidyCR
216 #elif defined(_WIN32) || defined(OS2_OS)
217 #define DEFAULT_NL_CONFIG TidyCRLF
218 #else
219 #define DEFAULT_NL_CONFIG TidyLF
220 #endif
221
222
223 #endif /* __STREAMIO_H__ */
224
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.