1 /******************************************************************************
2 *
3 * Copyright (C) 2004-2007, The Gentee Group. All rights reserved.
4 * This file is part of the Gentee open source project - http://www.gentee.com.
5 *
6 * THIS FILE IS PROVIDED UNDER THE TERMS OF THE GENTEE LICENSE ("AGREEMENT").
7 * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE CONSTITUTES RECIPIENTS
8 * ACCEPTANCE OF THE AGREEMENT.
9 *
10 * Author: Alexander Krivonogov ( algen )
11 *
12 ******************************************************************************/
13
14 /*-----------------------------------------------------------------------------
15 * Id: stringuni L "String - Unicode"
16 *
17 * Summary: Unicode strings. It is possible to use variables of the #b(ustr)
18 type for working with Unicode strings. The #b(ustr) type is
19 inherited from the #b(buf) type. So, you can also use
20 #a(buffer, methods of the buf type).
21 *
22 * List: *Operators,ustr_oplen,ustr_opind,ustr_opsum,ustr_opeq,ustr_opeqa,
23 ustr_opadd,ustr_opeqeq,ustr_opless,ustr_opgr,ustr_str2ustr,
24 ustr_ustr2str,
25 *Methods,ustr_clear,ustr_copy,ustr_del,ustr_findch,ustr_fromutf8,
26 ustr_insert,ustr_lines,ustr_read,ustr_replace,ustr_reserve,
27 ustr_setlen,ustr_split,ustr_substr,ustr_toutf8,ustr_trim,ustr_write
28 *
29 -----------------------------------------------------------------------------*/
30
31 define {
32 CP_ACP = 0
33 CP_UTF8 = 65001
34 MB_PRECOMPOSED = 1
35 }
36
37 /*-----------------------------------------------------------------------------
38 * Id: tustr T ustr
39 *
40 * Summary: The Unicode string type.
41 *
42 -----------------------------------------------------------------------------*/
43
44 type ustr <index=ushort inherit = buf>
45 {
46
47 }
48
49 /*-----------------------------------------------------------------------------
50 * Id: ustr_opless F4
51 *
52 * Summary: Comparison operation.
53 *
54 * Title: ustr < ustr
55 *
56 * Return: Returns #b(1) if the first string is less than the second one.
57 Otherwise, it returns #b(0).
58 *
59 -----------------------------------------------------------------------------*/
60
61 operator uint <( ustr left, ustr right )
62 {
63 if CompareStringW( 0, 0, left.ptr(), *left, right.ptr(),
64 *right ) < 0 : return 1
65 return 0
66 }
67
68 /*-----------------------------------------------------------------------------
69 * Id: ustr_opless_1 FC
70 *
71 * Summary: Comparison operation.
72 *
73 * Title: ustr <= ustr
74 *
75 * Return: Returns #b(1) if the first string is less or equal the second one.
76 Otherwise, it returns #b(0).
77 *
78 * Define: operator uint <=( ustr left, ustr right )
79 *
80 -----------------------------------------------------------------------------*/
81
82 /*-----------------------------------------------------------------------------
83 * Id: ustr_opgr F4
84 *
85 * Summary: Comparison operation.
86 *
87 * Title: ustr > ustr
88 *
89 * Return: Returns #b(1) if the first string is greater than the second one.
90 Otherwise, it returns #b(0).
91 *
92 -----------------------------------------------------------------------------*/
93
94 operator uint >( ustr left, ustr right )
95 {
96 if CompareStringW( 0, 0, left.ptr(), *left, right.ptr(),
97 *right ) > 0 : return 1
98 return 0
99 }
100
101 /*-----------------------------------------------------------------------------
102 * Id: ustr_opgr_1 FC
103 *
104 * Summary: Comparison operation.
105 *
106 * Title: ustr >= ustr
107 *
108 * Return: Returns #b(1) if the first string is greater or equal the second one.
109 Otherwise, it returns #b(0).
110 *
111 * Define: operator uint >=( ustr left, ustr right )
112 *
113 -----------------------------------------------------------------------------*/
114
115 /*
116 operator uint %==( ustr left right )
117 {
118 if *left != *right : return 0
119 return !ustrcmpign( left.ptr(), right.ptr())
120 }
121
122 operator uint %<( ustr left right )
123 {
124 if ustrcmpign( left.ptr(), right.ptr()) < 0 : return 1
125 return 0
126 }
127
128 operator uint %>( ustr left right )
129 {
130 if ustrcmpign( left.ptr(), right.ptr() ) > 0 : return 1
131 return 0
132 }*/
133
134 /*-----------------------------------------------------------------------------
135 * Id: ustr_opind F4
136 *
137 * Summary: Getting ushort character #b([i]) of the Unicode string.
138 *
139 * Title: ustr[ i ]
140 *
141 * Return: The #b([i]) ushort character of the Unicode string.
142 *
143 -----------------------------------------------------------------------------*/
144
145 method uint ustr.index( uint id )
146 {
147 return this.ptr() + ( id << 1 )
148 }
149
150 /*-----------------------------------------------------------------------------
151 * Id: ustr_oplen F4
152 *
153 * Summary: Get the length of a unicode string.
154 *
155 * Return: The length of the unicode string.
156 *
157 * Define: operator uint *( ustr left )
158 *
159 -----------------------------------------------------------------------------*/
160
161 operator uint *( ustr src )
162 {
163 return ( src.use >> 1 ) - 1
164 }
165
166 /*-----------------------------------------------------------------------------
167 * Id: ustr_reserve F2
168 *
169 * Summary: Memory reservation. The method increases the size of the memory
170 allocated for the unicode string.
171 *
172 * Params: len - The summary requested length of th eunicode string. If it is /
173 less than the current size, nothing happens. If the size is /
174 increased, the current string data is saved.
175 *
176 * Return: #lng/retobj#
177 *
178 -----------------------------------------------------------------------------*/
179
180 method ustr.reserve( uint len )
181 {
182 this->buf.reserve( len << 1 )
183 }
184
185 /*-----------------------------------------------------------------------------
186 * Id: ustr_opeq F4
187 *
188 * Summary: Assign types to unicode string. Copy a string to the unicode string
189 #b(ustr = str).
190 *
191 * Title: ustr = type
192 *
193 * Return: The result unicode string.
194 *
195 -----------------------------------------------------------------------------*/
196
197 operator ustr =( ustr left, str right )
198 {
199 uint len = ( MultiByteToWideChar( $CP_ACP, $MB_PRECOMPOSED, right.ptr(),
200 *right, left.ptr(), 0 ) + 1 )
201 left.reserve( len )
202 len = len << 1
203 MultiByteToWideChar( $CP_ACP, $MB_PRECOMPOSED, right.ptr(), *right,
204 left.ptr(), len )
205 (&((left->buf)[len-2]))->ushort = 0
206 left.use = len
207 return left
208 }
209
210 /*-----------------------------------------------------------------------------
211 * Id: ustr_opeqa F4
212 *
213 * Summary: Copy a unicode string to a string.
214 *
215 * Title: str = ustr
216 *
217 * Return: The result string.
218 *
219 -----------------------------------------------------------------------------*/
220
221 operator str =( str left, ustr right )
222 {
223
224 uint len = WideCharToMultiByte( $CP_ACP, 0, right.ptr(), *right,
225 left.ptr(), 0, 0, 0 )
226 left.reserve( len + 1 )
227 WideCharToMultiByte( $CP_ACP, 0, right.ptr(), *right,
228 left.ptr(), len + 1, 0, 0 )
229 left.setlen( len /*- 1*/ )
230 return left
231 }
232
233 /*-----------------------------------------------------------------------------
234 * Id: ustr_setlen F2
235 *
236 * Summary: Setting a new size of the unicode string. The method does not
237 reserve space.
238 You cannot specify the size of a string greater than the reserved
239 space you have. Mostly, this function is used for specifying the
240 size of a string after external functions write data to it.
241 *
242 * Params: len - New string size.
243 *
244 * Return: #lng/retobj#
245 *
246 -----------------------------------------------------------------------------*/
247
248 method ustr ustr.setlen( uint len )
249 {
250 len = ( ( len + 1 )<< 1 )
251 (&((this->buf)[len-2]))->ushort = 0
252 this.use = len
253 return this
254 }
255
256 /*-----------------------------------------------------------------------------
257 * Id: ustr_setlen_1 FB
258 *
259 * Summary: Recalculate the size of a unicode string to the zero character. The
260 function can be used to determine the size of a string after
261 other functions write data into it.
262 *
263 -----------------------------------------------------------------------------*/
264
265 method ustr ustr.setlenptr
266 {
267 // this.setlen( max( int( this.size >> 1 - 1 ), 0 ))
268 return this.setlen( this.findsh( 0, 0 ))
269 }
270
271 /*-----------------------------------------------------------------------------
272 * Id: ustr_opeq_1 FC
273 *
274 * Summary: Copy a unicode string to another unicode string.
275 *
276 * Title: ustr = ustr
277 *
278 -----------------------------------------------------------------------------*/
279
280 operator ustr =( ustr left, ustr right )
281 {
282 left->buf = right->buf
283 return left
284 }
285
286 /*-----------------------------------------------------------------------------
287 * Id: ustr_str2ustr F4
288 *
289 * Summary: Converting a string to a unicode string #b('ustr( str )').
290 *
291 * Title: ustr( str )
292 *
293 * Return: The result unicode string.
294 *
295 -----------------------------------------------------------------------------*/
296
297 method ustr str.ustr<result>()
298 {
299 result = this
300 }
301
302 /*-----------------------------------------------------------------------------
303 * Id: ustr_ustr2str F4
304 *
305 * Summary: Converting a unicode string to a string #b('str( ustr )').
306 *
307 * Title: str( ustr )
308 *
309 * Return: The result string.
310 *
311 -----------------------------------------------------------------------------*/
312
313 method str ustr.str<result>()
314 {
315 result = this
316 }
317
318 /*-----------------------------------------------------------------------------
319 * Id: ustr_opadd F4
320 *
321 * Summary: Appending types to the unicode string. Append #b(ustr) to #b(ustr)
322 => #b( ustr += ustr ).
323 *
324 * Title: ustr += type
325 *
326 * Return: The result unicode string.
327 *
328 -----------------------------------------------------------------------------*/
329
330 operator ustr +=( ustr left, ustr right )
331 {
332 left.use -= 2
333 left->buf += right->buf
334 return left
335 }
336
337 /*-----------------------------------------------------------------------------
338 * Id: ustr_opadd_1 FC
339 *
340 * Summary: Append #b(str) to #b(ustr) => #b( ustr += str ).
341 *
342 * Title: ustr += str
343 *
344 -----------------------------------------------------------------------------*/
345
346 operator ustr +=( ustr left, str right )
347 {
348 return left += right.ustr()
349 }
350
351 /*-----------------------------------------------------------------------------
352 * Id: ustr_opsum F4
353 *
354 * Summary: Add two strings. Putting two unicode strings together and creating
355 a resulting unicode string.
356 *
357 * Return: The new result unicode string.
358 *
359 -----------------------------------------------------------------------------*/
360
361 operator ustr +<result> ( ustr left, ustr right )
362 {
363 ( result = left ) += right
364 }
365
366 /*-----------------------------------------------------------------------------
367 * Id: ustr_opsum_1 FC
368 *
369 * Summary: Add a unicode string and a string.
370 *
371 * Return: The new result unicode string.
372 *
373 -----------------------------------------------------------------------------*/
374
375 operator ustr +<result>( ustr left, str right )
376 {
377 ( result = left ) += (right.ustr())
378 }
379
380 /*-----------------------------------------------------------------------------
381 * Id: ustr_write F2
382 *
383 * Summary: Writing a unicode string to a file.
384 *
385 * Params: filename - The name of the file for writing. If the file already /
386 exists, it will be overwritten.
387 *
388 * Return: The size of the written data.
389 *
390 -----------------------------------------------------------------------------*/
391
392 method uint ustr.write( str filename )
393 {
394 uint wr
395
396 this->buf.use -= 2
397 wr = this->buf.write( filename )
398 this->buf.use += 2
399 return wr
400 }
401
402 /*-----------------------------------------------------------------------------
403 * Id: ustr_read F2
404 *
405 * Summary: Read a unicode string from a file.
406 *
407 * Params: filename - Filename.
408 *
409 * Return: The size of the read data.
410 *
411 -----------------------------------------------------------------------------*/
412
413 method uint ustr.read( str filename )
414 {
415 uint wr
416
417 wr = this->buf.read( filename )
418 this->buf.expand(2)
419 (&((this->buf)[this.use]))->ushort = 0
420 this.use += 2
421 return wr
422 }
423
424 /*-----------------------------------------------------------------------------
425 * Id: ustr_toutf8 F2
426 *
427 * Summary: Convert a unicode string to UTF-8 string.
428 *
429 * Params: dest - Destination string.
430 *
431 * Return: The dest parameter.
432 *
433 -----------------------------------------------------------------------------*/
434
435 method str ustr.toutf8( str dest )
436 {
437 uint len = WideCharToMultiByte( $CP_UTF8, 0, this.ptr(), -1, dest.ptr(),
438 0, 0, 0 )
439 dest.reserve( len )
440 WideCharToMultiByte( $CP_UTF8, 0, this.ptr(), -1, dest.ptr(), len, 0, 0 )
441 dest.setlen( len - 1 )
442 return dest
443 }
444
445 /*-----------------------------------------------------------------------------
446 * Id: ustr_fromutf8 F2
447 *
448 * Summary: Convert a UTF-8 string to a unicode string.
449 *
450 * Params: src - Source UTF-8 string.
451 *
452 * Return: #lng/retobj#.
453 *
454 -----------------------------------------------------------------------------*/
455
456 method ustr ustr.fromutf8( str src )
457 {
458 uint len = ( MultiByteToWideChar( $CP_UTF8, 0, src.ptr(), *src,
459 this.ptr(), 0 ) + 1 )
460 this.reserve( len )
461 len = len << 1
462 MultiByteToWideChar( $CP_UTF8, 0, src.ptr(), *src, this.ptr(), len )
463 (&((this->buf)[len-2]))->ushort = 0
464 this.use = len
465 return this
466 }
467
468 /*func ustr fromutf8<result>( str src )
469 {
470 result.fromutf8( src )
471 }*/
472
473 /*-----------------------------------------------------------------------------
474 * Id: ustr_substr F2
475 *
476 * Summary: Getting a unicode substring.
477 *
478 * Params: src - Initial unicode string.
479 start - Substring offset.
480 len - Substring size.
481 *
482 * Return: #lng/retobj#
483 *
484 -----------------------------------------------------------------------------*/
485
486 method ustr ustr.substr( ustr src, uint start, uint len )
487 {
488 uint blen = len << 1
489 this.reserve( len )
490 this->buf.copy( src.ptr() + ( start << 1 ), blen )
491 this.setlen( len )
492 return this
493 }
494
495 method ustr ustr.init()
496 {
497 this->buf.reserve( 2 )
498 this.setlen( 0 )
499 return this
500 }
501
502 /*-----------------------------------------------------------------------------
503 * Id: ustr_findch F2
504 *
505 * Summary: Find the character in the unicode string.
506 *
507 * Params: off - The offset to start searching from.
508 symbol - Search character.
509 *
510 * Return: The offset of the character if it is found. If the character is not
511 found, the length of the string is returned.
512 *
513 -----------------------------------------------------------------------------*/
514
515 method uint ustr.findch( uint off, ushort symbol )
516 {
517 /*uint i
518 fornum i = off, *this
519 {
520 if this[i] == symbol
521 {
522 break
523 }
524 }
525 return i*/
526 return .findsh( off, symbol )
527 }
528
529 /*-----------------------------------------------------------------------------
530 * Id: ustr_findch_1 FA
531 *
532 * Summary: Find the character in the unicode string from the beginning of
533 the string.
534 *
535 * Params: symbol - Search character.
536 *
537 -----------------------------------------------------------------------------*/
538
539 method uint ustr.findch( ushort symbol )
540 {
541 //return .findch( 0, symbol )
542 return .findsh( 0, symbol )
543 }
544
545 /*-----------------------------------------------------------------------------
546 * Id: ustr_del F2
547 *
548 * Summary: Delete a substring.
549 *
550 * Params: off - The offset of the substring being deleted.
551 len - The size of the substring being deleted.
552 *
553 * Return: #lng/retobj#
554 *
555 -----------------------------------------------------------------------------*/
556
557 method ustr ustr.del( uint off, uint len )
558 {
559 uint slen = *this
560
561 if off > slen : return this
562 if off + len > slen : len = slen - off
563 this->buf.del( off<<1, len )
564 this.setlen( slen - len )
565
566 return this
567 }
568
569 /*-----------------------------------------------------------------------------
570 * Id: ustr_trim F2
571 *
572 * Summary: Trimming a unicode string.
573 *
574 * Title: ustr.trim...
575 *
576 * Params: symbol - The character being deleted.
577 flag - Flags. $$[trimflags]
578 *
579 * Return: #lng/retobj#
580 *
581 -----------------------------------------------------------------------------*/
582
583 method ustr ustr.trim( uint symbol, uint flag )
584 {
585 uint rsymbol = symbol
586 uint i, found
587
588 if flag & $TRIM_PAIR
589 {
590 switch symbol
591 {
592 case '(' : rsymbol = ')'
593 case '{' : rsymbol = '}'
594 case '[' : rsymbol = ']'
595 case '<' : rsymbol = '>'
596 }
597 }
598 if flag & $TRIM_SYS
599 {
600 if flag & $TRIM_RIGHT
601 {
602 i = *this-1
603 while this[i] <= 0x0020
604 {
605 this.setlen( i )
606 if flag & $TRIM_ONE : break
607 i--
608 }
609 }
610 if flag & $TRIM_LEFT
611 {
612 fornum i = 0, *this
613 {
614 if this[i] <= 0x0020
615 {
616 found++
617 if flag & $TRIM_ONE : break
618 }
619 else : break
620 }
621 if found : this.del( 0, found )
622 }
623 }
624 else
625 {
626 if flag & $TRIM_RIGHT
627 {
628 i = *this-1
629 while this[i] == symbol
630 {
631 this.setlen( i )
632 // print( "setlen \(*this) \(this.str()) \(this[8]) \(this[9])\n" )
633 if flag & $TRIM_ONE : break
634 i--
635 }
636 }
637 if flag & $TRIM_LEFT
638 {
639 fornum i = 0, *this
640 {
641 if this[i] == symbol
642 {
643 found++
644 if flag & $TRIM_ONE : break
645 }
646 else : break
647 }
648 if found : this.del( 0, found )
649 }
650 }
651
652 return this;
653 }
654
655 /*-----------------------------------------------------------------------------
656 * Id: ustr_trim_1 FB
657 *
658 * Summary: Deleting spaces on the right.
659 *
660 -----------------------------------------------------------------------------*/
661
662 method ustr ustr.trimrspace()
663 {
664 return this.trim( ' ', $TRIM_RIGHT )
665 }
666
667 /*-----------------------------------------------------------------------------
668 * Id: ustr_trim_2 FB
669 *
670 * Summary: Deleting spaces on the both sides.
671 *
672 -----------------------------------------------------------------------------*/
673
674 method ustr ustr.trimspace()
675 {
676 return this.trim( ' ', $TRIM_RIGHT | $TRIM_LEFT )
677 }
678
679 /*-----------------------------------------------------------------------------
680 * Id: ustr_copy F2
681 *
682 * Summary: Copying. The method copies the specified size of the data into
683 a unicode string.
684 *
685 * Params: ptr - The pointer to the data being copied. If data does not end in /
686 a zero, it will be added automatically.
687 size - The size of the data being copied.
688 *
689 * Return: #lng/retobj#
690 *
691 -----------------------------------------------------------------------------*/
692
693 method ustr ustr.copy( uint ptr, uint size )
694 {
695 this->buf.copy( ptr, size << 1 )
696 .setlen( size )
697 return this
698 }
699
700 /*-----------------------------------------------------------------------------
701 * Id: ustr_copy_1 FB
702 *
703 * Summary: The method copies data into a unicode string.
704 *
705 * Params: ptr - The pointer to the data being copied. All data to the zero /
706 ushort will be copied.
707 *
708 * Return: #lng/retobj#
709 *
710 -----------------------------------------------------------------------------*/
711
712 method ustr ustr.copy( uint ptr )
713 {
714 .copy( ptr, mlensh( ptr ))
715 return this
716 }
717
718 /*-----------------------------------------------------------------------------
719 * Id: ustr_replace F2
720 *
721 * Summary: Replacing in a unicode string. The method replaces data in
722 a unicode string.
723 *
724 * Params: offset - The offset of the data being replaced.
725 size - The size of the data being replaced.
726 value - The unicode string being inserted.
727 *
728 * Return: #lng/retobj#
729 *
730 -----------------------------------------------------------------------------*/
731
732 method ustr ustr.replace( uint offset, uint size, ustr value )
733 {
734 if offset >= *this : this += value
735 else
736 {
737 value->buf.use -= 2
738 this->buf.replace( offset << 1, size << 1, value->buf )
739 value->buf.use += 2
740 }
741 return this
742 }
743
744 /*-----------------------------------------------------------------------------
745 * Id: ustr_insert F2
746 *
747 * Summary: Insertion. The method inserts one unicode string into another.
748 *
749 * Params: offset - The offset where string will be inserted.
750 value - The unicode string being inserted.
751 *
752 * Return: #lng/retobj#
753 *
754 -----------------------------------------------------------------------------*/
755
756 method ustr ustr.insert( uint offset, ustr value )
757 {
758 return this.replace( offset, 0, value )
759 }
760
761 /*-----------------------------------------------------------------------------
762 * Id: ustr_opeqeq F4
763 *
764 * Summary: Comparison operation.
765 *
766 * Return: Returns #b(1) if the strings are equal. Otherwise, it returns #b(0).
767 *
768 -----------------------------------------------------------------------------*/
769
770 operator uint ==( str left, ustr right )
771 {
772 return left.ustr() == right
773 }
774
775 /*-----------------------------------------------------------------------------
776 * Id: ustr_opeqeq_1 FC
777 *
778 * Summary: Comparison operation.
779 *
780 * Return: Returns #b(1) if the strings are equal. Otherwise, it returns #b(0).
781 *
782 -----------------------------------------------------------------------------*/
783
784 operator uint ==( ustr left, str right )
785 {
786 return left == right.ustr()
787 }
788
789 /*-----------------------------------------------------------------------------
790 ** Id: ustr_clear F3
791 *
792 * Summary: Clearing a unicode string.
793 *
794 * Return: #lng/retobj#
795 *
796 -----------------------------------------------------------------------------*/
797
798 method ustr ustr.clear
799 {
800 return this.setlen( 0 )
801 }