1 /******************************************************************************
2 *
3 * Copyright (C) 2004-2008, The Gentee Group. All rights reserved.
4 * This file is part of the Gentee open source project - http://www.gentee.com.
5 *
6 * THIS FILE IS PROVIDED UNDER THE TERMS OF THE GENTEE LICENSE ("AGREEMENT").
7 * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE CONSTITUTES RECIPIENTS
8 * ACCEPTANCE OF THE AGREEMENT.
9 *
10 * Author: Alexander Krivonogov ( algen )
11 *
12 ******************************************************************************/
13
14 /*-----------------------------------------------------------------------------
15 * Id: xml L "XML"
16 *
17 * Summary: XML file processing. This library is used for XML file processing
18 and XML tree building. Neither a multibyte-character set nor a
19 document type description #b(#lgt[!DOCTYPE .....]) are handled in
20 the current version. For using this library, it is required to
21 specify the file xml.g (from lib\xml subfolder) with include
22 command. #srcg[
23 |include : $"...\gentee\lib\xml\xml.g"]
24 *
25 * List: *,xml_desc,
26 *#lng/opers#,xml_opfor,
27 *#lng/methods#,xml_addentity,xml_getroot,xml_procfile,xml_procstr,
28 *Methods of XML tree items,xmlitem_chtag,xmlitem_findtag,
29 xmlitem_getattrib,xmlitem_getchild,xmlitem_getchildtag,
30 xmlitem_getchildtext,xmlitem_getname,xmlitem_getnext,
31 xmlitem_getnexttag,xmlitem_getnexttext,xmlitem_getparent,
32 xmlitem_gettext,xmlitem_isemptytag,
33 xmlitem_ispitag,xmlitem_istag,xmlitem_istext
34 *
35 -----------------------------------------------------------------------------*/
36
37 define
38 {
39 NUMSYM = 256
40 }
41
42 type posb
43 {
44 byte state
45 byte afunc
46 byte retstate
47 }
48
49 type pos
50 {
51 int state
52 uint afunc
53 int retstate
54 uint r
55 }
56
57 operator pos = ( pos l, posb r)
58 {
59 l.state = r.state
60 l.afunc = r.afunc
61 l.retstate = r.retstate
62 return l
63 }
64
65 define {
66 TG_TEXT = 0x01 //Текст (в tgstart номер начального текста, в tgend номер конечного текста)
67 TG_TAG = 0x10 //Тэг (в tgid идентификатор имени, в tgstart номер начального атрибута, в tgend номер конечного атрибута)
68 TG_QUEST = 0x12 //<? ?>
69 TG_NOCHILD = 0x14 //< />
70
71 TX_TEXT = 0x01 //Просто текст
72 TX_SYMBOL = 0x02 //Символ, в txaddr_code код вставляемого символа
73 TX_ENTITY = 0x03 //Сущность, в txaddr_code код имени сущности в хэш таблице сущностей
74 }
75 //Элемент дерева разбора текст или тэг
76 type xmlitem {
77 uint tgtype //Тип элемента TG_*
78 uint tgid //Идентфикатор имени тэга в хэш таблице тэгов
79 uint tgstart //Номер начального атрибута/текста в таблице атрибутов/текстов
80 uint tgend //Номер конечного+1 атрибута/текста в таблице атрибутов/текстов
81 uint nparent //Номер тэга владельца
82 uint nnext //Номер следующего тэга
83 uint nchild //Номер первого потомка
84 uint xml
85 }
86 type xmltags <index=xmlitem>{
87 uint parent
88 uint cur
89 }
90
91 //Элемент массива атрибутов
92 type xattrib {
93 uint attid //Идентификатор имени атрибута
94 uint attstart //Номер начального текста(значение атрибута) в таблице текстов
95 uint attend //Номер конечного текста(значение атрибута) в таблице текстов
96 }
97
98 //Элемент массива текстов
99 type xtext {
100 uint txtype //Тип текста TX_*
101 uint txaddr_code //Адрес начала исходного текста/код символа/код имени сущности
102 uint txlen //Длина исходного теста
103 }
104
105 //Объект разбора xml текста
106 type xml
107 {
108 buf src //Исходный текст
109 arr tags of xmlitem //Массив/дерево тэгов
110 arr attribs of xattrib //Массив сущностей
111 arr texts of xtext //Массив текстов
112 hash hnames //Хэш таблица имён тэгов
113 arr names of str //Таблица строк для хэш таблицы hnames
114 hash hentities //Хэш таблица имён сущностей
115 uint err
116 }
117
118 global
119 {
120 arr tp[1,256] of pos
121
122 uint X_ia
123 uint X_curtag, X_curtext, X_curattrib
124 uint X_ncurtag, X_nparenttag, X_ncurtext, X_ncurattrib
125 uint X_maxtag, X_maxattrib, X_maxtext, X_maxstack
126 arr X_stacktags of uint
127 uint X_nstack
128 uint X_curnameoff
129 uint X_x
130 buf X_tblsrc = '\<sp.tbl>'
131 str X_sname
132 uint X_n, X_tparenttag
133 }
134
135 include {
136 "xmlfuncs.g"
137 "xmluser.g"
138 }
139
140 method xml.init()
141 {
142 uint i,j
143 arr ar[0,$NUMSYM] of posb
144 ar->buf = X_tblsrc//.read( "sp.X_tblsrc" )
145 tp.expand( (*ar/$NUMSYM)*256 )
146 fornum i=0, *ar/$NUMSYM
147 {
148 fornum j=0, $NUMSYM
149 {
150 tp[i+1,j] = ar[i,j]
151 if ar[i,j].state && ar[i,j].state !=-1
152 {
153 tp[i+1,j].state <<= 12
154 tp[i+1,j].state += tp.ptr()
155 }
156 if ar[i,j].retstate && ar[i,j].retstate !=-1
157 {
158 tp[i+1,j].retstate <<= 12
159 tp[i+1,j].retstate += tp.ptr()
160 }
161 switch ar[i,j].afunc
162 {
163 case 1 : tp[i+1,j].afunc = &f_begent
164 case 2 : tp[i+1,j].afunc = &f_endent
165 case 3 : tp[i+1,j].afunc = &f_endentnum
166 case 4 : tp[i+1,j].afunc = &f_endenthex
167 case 5 : tp[i+1,j].afunc = &f_begatrval
168 case 6 : tp[i+1,j].afunc = &f_endatrval
169 case 7 : tp[i+1,j].afunc = &f_begquest
170 case 8 : tp[i+1,j].afunc = &f_endquest
171 case 9 : tp[i+1,j].afunc = &f_endtagname
172 case 10 : tp[i+1,j].afunc = &f_begatr
173 case 11 : tp[i+1,j].afunc = &f_endatr
174 case 12 : tp[i+1,j].afunc = &f_begtag
175 case 13 : tp[i+1,j].afunc = &f_endtag
176 case 14 : tp[i+1,j].afunc = &f_endtagend
177 case 15 : tp[i+1,j].afunc = &f_begendtag
178 case 16 : tp[i+1,j].afunc = &f_begendtagend
179 case 17 : tp[i+1,j].afunc = &f_begcdata
180 case 18 : tp[i+1,j].afunc = &f_endcdata
181 case 255 : tp[i+1,j].afunc = &f_error
182 }
183 }
184 }
185 }
186
187 method uint xml.process()
188 {
189 uint off
190 arr ars[512] of uint
191 uint state, retstate
192 uint afunc
193
194 state = tp.ptr() + (1 << 12)
195 //Инициализация
196 if X_x: return 0
197 X_x = &this
198 this.err = 0
199 this.hnames.clear()
200 this.hentities.clear()
201 this.tags.clear()
202 this.texts.clear()
203 this.names.clear()
204 this.attribs.clear()
205
206 this.hnames.ignorecase()
207 this.hentities.ignorecase()
208 //this.names.reserve(100)
209 this.names.expand(1)
210 X_maxtag = max( *this.src/10, 100 )
211 this.tags.expand( X_maxtag )
212 X_maxtext = max( *this.src/20, 100 )
213 this.texts.expand(X_maxtext)
214 X_maxattrib = max( *this.src/40, 100 )
215 this.attribs.expand(X_maxattrib)
216
217 X_ncurattrib = 0
218 X_curattrib = &this.attribs[X_ncurattrib]
219
220 X_ncurtext = 0
221 X_curtext = &this.texts[X_ncurtext]
222
223 X_ncurtag = 0
224 //Корневой элемент
225 X_curtag = &this.tags[X_ncurtag]
226 X_curtag->xmlitem.tgtype = 0//$TG_TEXT
227 X_curtag->xmlitem.nchild = ++X_ncurtag
228
229 //Добавляем тэг заготовку
230 X_curtag = &this.tags[X_ncurtag]
231 X_curtag->xmlitem.nchild = 1
232 X_curtag->xmlitem.tgstart = X_ncurtext
233 X_nparenttag = 0
234
235 X_ia=this.src.ptr()
236 X_stacktags.clear()
237 X_maxstack = 100
238 X_stacktags.expand(X_maxstack)
239 X_nstack = 0
240
241 //Начать пустой текст
242 X_curtext->xtext.txaddr_code = X_ia
243 X_curtext->xtext.txtype = 0
244 uint arrs = ars.ptr()
245 fornum X_ia, this.src.ptr() + *this.src
246 {
247
248 if afunc = ((off = state + (X_ia->byte << 4)) + 4)->uint : afunc->func()
249
250 if state = off->uint
251 {
252 if retstate = (off + 8)->uint
253 {
254 arrs->uint = retstate
255 arrs += 4
256 }
257 continue
258 }
259 state = (arrs -= 4)->uint
260 }
261 if !X_curtext->xtext.txtype &&
262 X_curtext->xtext.txaddr_code != X_ia
263 { //Заканчиваем текст-текст
264 X_curtext->xtext.txlen = X_ia - X_curtext->xtext.txaddr_code
265 X_curtext->xtext.txtype = $TX_TEXT
266 X_curtag->xmlitem.tgtype = $TG_TEXT
267 X_curtag->xmlitem.tgend = X_ncurtext + 1
268 }
269 this.tags.del(X_ncurtag + 1)
270 this.texts.del(X_ncurtext + 1)
271 this.attribs.del(X_ncurattrib + 1)
272 uint i
273 fornum i = 0, *this.tags
274 {
275 this.tags[i].xml = &this
276 }
277 X_x = 0
278 if !this.err
279 {
280 this.addentity("amp","&")
281 this.addentity("quot","\"")
282 this.addentity("apos","'")
283 this.addentity("gt",">")
284 this.addentity("lt","<")
285 }
286 return !this.err
287 }
288
289 /*-----------------------------------------------------------------------------
290 * Id: xml_procfile F2
291 *
292 * Summary: Process an XML file. Reads the XML file, the name of which is
293 specified as a parameter, and process it.
294 *
295 * Params: filename - Name of the file processed.
296 *
297 * Return: #lng/retf#
298 *
299 -----------------------------------------------------------------------------*/
300
301 method uint xml.procfile( str filename )
302 {
303 if this.src.read( filename )
304 {
305 return this.process()
306 }
307 return 0
308 }
309
310 /*-----------------------------------------------------------------------------
311 * Id: xml_procstr F2
312 *
313 * Summary: Processes a string contained the XML document.
314 *
315 * Params: src - XML data string.
316 *
317 * Return: #lng/retf#
318 *
319 -----------------------------------------------------------------------------*/
320
321 method uint xml.procstr( str src )
322 {
323 this.src = src->buf
324 return this.process()
325 }
326
327 /*-----------------------------------------------------------------------------
328 ** Id: xml_desc F1
329 *
330 * Summary: A brief description of XML library. Variables of either the #b(xml)
331 and the #b(xmlitem) type (an XML tree item) are used for processing
332 XML documents. An XML tree item can be of two types: a #b(text item)
333 and a #b(tag item). There are several types of tag items:
334 #ul[
335 |tag item that contains other items #b(#lgt[tag ...].....#lgt[/tag]);
336 |tag item that contains no other items #b(#lgt[tag .../]);
337 |tag item of processing instruction #b(#lgt[?tag ...?]).
338 ]
339 #p[A tag item may contain attributes.]
340
341 #p[The sequence of operations for processing an XML document:]
342 #ul[
343 process a document (build an XML tree) with the help of the #a(xml_procfile)
344 | method or the #a(xml_procstr) method;
345 |add entity definitions, using the #a(xml_addentity) method if necessary;
346 search for the required items in the XML tree using the following methods:
347 #a(xml_getroot), #a(xmlitem_chtag), #a(xmlitem_findtag),
348 | #a(xmlitem_getnext), etc.;
349 use the #b(foreach) statement in order to process similar elements if
350 | necessary;
351 gain access to tag attributes with the help of the #a(xmlitem_getattrib)
352 | method and get a text using the #a(xmlitem_gettext) method.
353 ]
354 *
355 * Title: XML description
356 *
357 * Define:
358 *
359 -----------------------------------------------------------------------------*/
360
361 //----------------------------------------------------------------------------