|
Ruby
1.9.3p448(2013-06-27revision41675)
|
00001 /********************************************************************** 00002 regcomp.c - Oniguruma (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * All rights reserved. 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions 00010 * are met: 00011 * 1. Redistributions of source code must retain the above copyright 00012 * notice, this list of conditions and the following disclaimer. 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in the 00015 * documentation and/or other materials provided with the distribution. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00018 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00020 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00021 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00022 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00023 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00024 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00025 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00026 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00027 * SUCH DAMAGE. 00028 */ 00029 00030 #include "regparse.h" 00031 00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; 00033 00034 extern OnigCaseFoldType 00035 onig_get_default_case_fold_flag(void) 00036 { 00037 return OnigDefaultCaseFoldFlag; 00038 } 00039 00040 extern int 00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) 00042 { 00043 OnigDefaultCaseFoldFlag = case_fold_flag; 00044 return 0; 00045 } 00046 00047 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 00048 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; 00049 #endif 00050 00051 static UChar* 00052 str_dup(UChar* s, UChar* end) 00053 { 00054 ptrdiff_t len = end - s; 00055 00056 if (len > 0) { 00057 UChar* r = (UChar* )xmalloc(len + 1); 00058 CHECK_NULL_RETURN(r); 00059 xmemcpy(r, s, len); 00060 r[len] = (UChar )0; 00061 return r; 00062 } 00063 else return NULL; 00064 } 00065 00066 static void 00067 swap_node(Node* a, Node* b) 00068 { 00069 Node c; 00070 c = *a; *a = *b; *b = c; 00071 00072 if (NTYPE(a) == NT_STR) { 00073 StrNode* sn = NSTR(a); 00074 if (sn->capa == 0) { 00075 size_t len = sn->end - sn->s; 00076 sn->s = sn->buf; 00077 sn->end = sn->s + len; 00078 } 00079 } 00080 00081 if (NTYPE(b) == NT_STR) { 00082 StrNode* sn = NSTR(b); 00083 if (sn->capa == 0) { 00084 size_t len = sn->end - sn->s; 00085 sn->s = sn->buf; 00086 sn->end = sn->s + len; 00087 } 00088 } 00089 } 00090 00091 static OnigDistance 00092 distance_add(OnigDistance d1, OnigDistance d2) 00093 { 00094 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) 00095 return ONIG_INFINITE_DISTANCE; 00096 else { 00097 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; 00098 else return ONIG_INFINITE_DISTANCE; 00099 } 00100 } 00101 00102 static OnigDistance 00103 distance_multiply(OnigDistance d, int m) 00104 { 00105 if (m == 0) return 0; 00106 00107 if (d < ONIG_INFINITE_DISTANCE / m) 00108 return d * m; 00109 else 00110 return ONIG_INFINITE_DISTANCE; 00111 } 00112 00113 static int 00114 bitset_is_empty(BitSetRef bs) 00115 { 00116 int i; 00117 for (i = 0; i < (int )BITSET_SIZE; i++) { 00118 if (bs[i] != 0) return 0; 00119 } 00120 return 1; 00121 } 00122 00123 #ifdef ONIG_DEBUG 00124 static int 00125 onig_is_prelude(void) 00126 { 00127 return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE")); 00128 } 00129 00130 static int 00131 bitset_on_num(BitSetRef bs) 00132 { 00133 int i, n; 00134 00135 n = 0; 00136 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 00137 if (BITSET_AT(bs, i)) n++; 00138 } 00139 return n; 00140 } 00141 #endif 00142 00143 extern int 00144 onig_bbuf_init(BBuf* buf, OnigDistance size) 00145 { 00146 if (size <= 0) { 00147 size = 0; 00148 buf->p = NULL; 00149 } 00150 else { 00151 buf->p = (UChar* )xmalloc(size); 00152 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); 00153 } 00154 00155 buf->alloc = (unsigned int)size; 00156 buf->used = 0; 00157 return 0; 00158 } 00159 00160 00161 #ifdef USE_SUBEXP_CALL 00162 00163 static int 00164 unset_addr_list_init(UnsetAddrList* uslist, int size) 00165 { 00166 UnsetAddr* p; 00167 00168 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); 00169 CHECK_NULL_RETURN_MEMERR(p); 00170 uslist->num = 0; 00171 uslist->alloc = size; 00172 uslist->us = p; 00173 return 0; 00174 } 00175 00176 static void 00177 unset_addr_list_end(UnsetAddrList* uslist) 00178 { 00179 if (IS_NOT_NULL(uslist->us)) 00180 xfree(uslist->us); 00181 } 00182 00183 static int 00184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) 00185 { 00186 UnsetAddr* p; 00187 int size; 00188 00189 if (uslist->num >= uslist->alloc) { 00190 size = uslist->alloc * 2; 00191 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); 00192 CHECK_NULL_RETURN_MEMERR(p); 00193 uslist->alloc = size; 00194 uslist->us = p; 00195 } 00196 00197 uslist->us[uslist->num].offset = offset; 00198 uslist->us[uslist->num].target = node; 00199 uslist->num++; 00200 return 0; 00201 } 00202 #endif /* USE_SUBEXP_CALL */ 00203 00204 00205 static int 00206 add_opcode(regex_t* reg, int opcode) 00207 { 00208 BBUF_ADD1(reg, opcode); 00209 return 0; 00210 } 00211 00212 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00213 static int 00214 add_state_check_num(regex_t* reg, int num) 00215 { 00216 StateCheckNumType n = (StateCheckNumType )num; 00217 00218 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); 00219 return 0; 00220 } 00221 #endif 00222 00223 static int 00224 add_rel_addr(regex_t* reg, int addr) 00225 { 00226 RelAddrType ra = (RelAddrType )addr; 00227 00228 BBUF_ADD(reg, &ra, SIZE_RELADDR); 00229 return 0; 00230 } 00231 00232 static int 00233 add_abs_addr(regex_t* reg, int addr) 00234 { 00235 AbsAddrType ra = (AbsAddrType )addr; 00236 00237 BBUF_ADD(reg, &ra, SIZE_ABSADDR); 00238 return 0; 00239 } 00240 00241 static int 00242 add_length(regex_t* reg, OnigDistance len) 00243 { 00244 LengthType l = (LengthType )len; 00245 00246 BBUF_ADD(reg, &l, SIZE_LENGTH); 00247 return 0; 00248 } 00249 00250 static int 00251 add_mem_num(regex_t* reg, int num) 00252 { 00253 MemNumType n = (MemNumType )num; 00254 00255 BBUF_ADD(reg, &n, SIZE_MEMNUM); 00256 return 0; 00257 } 00258 00259 static int 00260 add_pointer(regex_t* reg, void* addr) 00261 { 00262 PointerType ptr = (PointerType )addr; 00263 00264 BBUF_ADD(reg, &ptr, SIZE_POINTER); 00265 return 0; 00266 } 00267 00268 static int 00269 add_option(regex_t* reg, OnigOptionType option) 00270 { 00271 BBUF_ADD(reg, &option, SIZE_OPTION); 00272 return 0; 00273 } 00274 00275 static int 00276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr) 00277 { 00278 int r; 00279 00280 r = add_opcode(reg, opcode); 00281 if (r) return r; 00282 r = add_rel_addr(reg, addr); 00283 return r; 00284 } 00285 00286 static int 00287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len) 00288 { 00289 BBUF_ADD(reg, bytes, len); 00290 return 0; 00291 } 00292 00293 static int 00294 add_bitset(regex_t* reg, BitSetRef bs) 00295 { 00296 BBUF_ADD(reg, bs, SIZE_BITSET); 00297 return 0; 00298 } 00299 00300 static int 00301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) 00302 { 00303 int r; 00304 00305 r = add_opcode(reg, opcode); 00306 if (r) return r; 00307 r = add_option(reg, option); 00308 return r; 00309 } 00310 00311 static int compile_length_tree(Node* node, regex_t* reg); 00312 static int compile_tree(Node* node, regex_t* reg); 00313 00314 00315 #define IS_NEED_STR_LEN_OP_EXACT(op) \ 00316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ 00317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) 00318 00319 static int 00320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case) 00321 { 00322 int op; 00323 00324 if (ignore_case) { 00325 switch (str_len) { 00326 case 1: op = OP_EXACT1_IC; break; 00327 default: op = OP_EXACTN_IC; break; 00328 } 00329 } 00330 else { 00331 switch (mb_len) { 00332 case 1: 00333 switch (str_len) { 00334 case 1: op = OP_EXACT1; break; 00335 case 2: op = OP_EXACT2; break; 00336 case 3: op = OP_EXACT3; break; 00337 case 4: op = OP_EXACT4; break; 00338 case 5: op = OP_EXACT5; break; 00339 default: op = OP_EXACTN; break; 00340 } 00341 break; 00342 00343 case 2: 00344 switch (str_len) { 00345 case 1: op = OP_EXACTMB2N1; break; 00346 case 2: op = OP_EXACTMB2N2; break; 00347 case 3: op = OP_EXACTMB2N3; break; 00348 default: op = OP_EXACTMB2N; break; 00349 } 00350 break; 00351 00352 case 3: 00353 op = OP_EXACTMB3N; 00354 break; 00355 00356 default: 00357 op = OP_EXACTMBN; 00358 break; 00359 } 00360 } 00361 return op; 00362 } 00363 00364 static int 00365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) 00366 { 00367 int r; 00368 int saved_num_null_check = reg->num_null_check; 00369 00370 if (empty_info != 0) { 00371 r = add_opcode(reg, OP_NULL_CHECK_START); 00372 if (r) return r; 00373 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ 00374 if (r) return r; 00375 reg->num_null_check++; 00376 } 00377 00378 r = compile_tree(node, reg); 00379 if (r) return r; 00380 00381 if (empty_info != 0) { 00382 if (empty_info == NQ_TARGET_IS_EMPTY) 00383 r = add_opcode(reg, OP_NULL_CHECK_END); 00384 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) 00385 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); 00386 else if (empty_info == NQ_TARGET_IS_EMPTY_REC) 00387 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); 00388 00389 if (r) return r; 00390 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ 00391 } 00392 return r; 00393 } 00394 00395 #ifdef USE_SUBEXP_CALL 00396 static int 00397 compile_call(CallNode* node, regex_t* reg) 00398 { 00399 int r; 00400 00401 r = add_opcode(reg, OP_CALL); 00402 if (r) return r; 00403 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), 00404 node->target); 00405 if (r) return r; 00406 r = add_abs_addr(reg, 0 /*dummy addr.*/); 00407 return r; 00408 } 00409 #endif 00410 00411 static int 00412 compile_tree_n_times(Node* node, int n, regex_t* reg) 00413 { 00414 int i, r; 00415 00416 for (i = 0; i < n; i++) { 00417 r = compile_tree(node, reg); 00418 if (r) return r; 00419 } 00420 return 0; 00421 } 00422 00423 static int 00424 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, 00425 regex_t* reg ARG_UNUSED, int ignore_case) 00426 { 00427 int len; 00428 int op = select_str_opcode(mb_len, str_len, ignore_case); 00429 00430 len = SIZE_OPCODE; 00431 00432 if (op == OP_EXACTMBN) len += SIZE_LENGTH; 00433 if (IS_NEED_STR_LEN_OP_EXACT(op)) 00434 len += SIZE_LENGTH; 00435 00436 len += mb_len * str_len; 00437 return len; 00438 } 00439 00440 static int 00441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len, 00442 regex_t* reg, int ignore_case) 00443 { 00444 int op = select_str_opcode(mb_len, str_len, ignore_case); 00445 add_opcode(reg, op); 00446 00447 if (op == OP_EXACTMBN) 00448 add_length(reg, mb_len); 00449 00450 if (IS_NEED_STR_LEN_OP_EXACT(op)) { 00451 if (op == OP_EXACTN_IC) 00452 add_length(reg, mb_len * str_len); 00453 else 00454 add_length(reg, str_len); 00455 } 00456 00457 add_bytes(reg, s, mb_len * str_len); 00458 return 0; 00459 } 00460 00461 00462 static int 00463 compile_length_string_node(Node* node, regex_t* reg) 00464 { 00465 int rlen, r, len, prev_len, slen, ambig; 00466 OnigEncoding enc = reg->enc; 00467 UChar *p, *prev; 00468 StrNode* sn; 00469 00470 sn = NSTR(node); 00471 if (sn->end <= sn->s) 00472 return 0; 00473 00474 ambig = NSTRING_IS_AMBIG(node); 00475 00476 p = prev = sn->s; 00477 prev_len = enclen(enc, p, sn->end); 00478 p += prev_len; 00479 slen = 1; 00480 rlen = 0; 00481 00482 for (; p < sn->end; ) { 00483 len = enclen(enc, p, sn->end); 00484 if (len == prev_len) { 00485 slen++; 00486 } 00487 else { 00488 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00489 rlen += r; 00490 prev = p; 00491 slen = 1; 00492 prev_len = len; 00493 } 00494 p += len; 00495 } 00496 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00497 rlen += r; 00498 return rlen; 00499 } 00500 00501 static int 00502 compile_length_string_raw_node(StrNode* sn, regex_t* reg) 00503 { 00504 if (sn->end <= sn->s) 00505 return 0; 00506 00507 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00508 } 00509 00510 static int 00511 compile_string_node(Node* node, regex_t* reg) 00512 { 00513 int r, len, prev_len, slen, ambig; 00514 OnigEncoding enc = reg->enc; 00515 UChar *p, *prev, *end; 00516 StrNode* sn; 00517 00518 sn = NSTR(node); 00519 if (sn->end <= sn->s) 00520 return 0; 00521 00522 end = sn->end; 00523 ambig = NSTRING_IS_AMBIG(node); 00524 00525 p = prev = sn->s; 00526 prev_len = enclen(enc, p, end); 00527 p += prev_len; 00528 slen = 1; 00529 00530 for (; p < end; ) { 00531 len = enclen(enc, p, end); 00532 if (len == prev_len) { 00533 slen++; 00534 } 00535 else { 00536 r = add_compile_string(prev, prev_len, slen, reg, ambig); 00537 if (r) return r; 00538 00539 prev = p; 00540 slen = 1; 00541 prev_len = len; 00542 } 00543 00544 p += len; 00545 } 00546 return add_compile_string(prev, prev_len, slen, reg, ambig); 00547 } 00548 00549 static int 00550 compile_string_raw_node(StrNode* sn, regex_t* reg) 00551 { 00552 if (sn->end <= sn->s) 00553 return 0; 00554 00555 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00556 } 00557 00558 static int 00559 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) 00560 { 00561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00562 add_length(reg, mbuf->used); 00563 return add_bytes(reg, mbuf->p, mbuf->used); 00564 #else 00565 int r, pad_size; 00566 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; 00567 00568 GET_ALIGNMENT_PAD_SIZE(p, pad_size); 00569 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); 00570 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00571 00572 r = add_bytes(reg, mbuf->p, mbuf->used); 00573 00574 /* padding for return value from compile_length_cclass_node() to be fix. */ 00575 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; 00576 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00577 return r; 00578 #endif 00579 } 00580 00581 static int 00582 compile_length_cclass_node(CClassNode* cc, regex_t* reg) 00583 { 00584 int len; 00585 00586 if (IS_NCCLASS_SHARE(cc)) { 00587 len = SIZE_OPCODE + SIZE_POINTER; 00588 return len; 00589 } 00590 00591 if (IS_NULL(cc->mbuf)) { 00592 len = SIZE_OPCODE + SIZE_BITSET; 00593 } 00594 else { 00595 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00596 len = SIZE_OPCODE; 00597 } 00598 else { 00599 len = SIZE_OPCODE + SIZE_BITSET; 00600 } 00601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00602 len += SIZE_LENGTH + cc->mbuf->used; 00603 #else 00604 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); 00605 #endif 00606 } 00607 00608 return len; 00609 } 00610 00611 static int 00612 compile_cclass_node(CClassNode* cc, regex_t* reg) 00613 { 00614 int r; 00615 00616 if (IS_NCCLASS_SHARE(cc)) { 00617 add_opcode(reg, OP_CCLASS_NODE); 00618 r = add_pointer(reg, cc); 00619 return r; 00620 } 00621 00622 if (IS_NULL(cc->mbuf)) { 00623 if (IS_NCCLASS_NOT(cc)) 00624 add_opcode(reg, OP_CCLASS_NOT); 00625 else 00626 add_opcode(reg, OP_CCLASS); 00627 00628 r = add_bitset(reg, cc->bs); 00629 } 00630 else { 00631 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00632 if (IS_NCCLASS_NOT(cc)) 00633 add_opcode(reg, OP_CCLASS_MB_NOT); 00634 else 00635 add_opcode(reg, OP_CCLASS_MB); 00636 00637 r = add_multi_byte_cclass(cc->mbuf, reg); 00638 } 00639 else { 00640 if (IS_NCCLASS_NOT(cc)) 00641 add_opcode(reg, OP_CCLASS_MIX_NOT); 00642 else 00643 add_opcode(reg, OP_CCLASS_MIX); 00644 00645 r = add_bitset(reg, cc->bs); 00646 if (r) return r; 00647 r = add_multi_byte_cclass(cc->mbuf, reg); 00648 } 00649 } 00650 00651 return r; 00652 } 00653 00654 static int 00655 entry_repeat_range(regex_t* reg, int id, int lower, int upper) 00656 { 00657 #define REPEAT_RANGE_ALLOC 4 00658 00659 OnigRepeatRange* p; 00660 00661 if (reg->repeat_range_alloc == 0) { 00662 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); 00663 CHECK_NULL_RETURN_MEMERR(p); 00664 reg->repeat_range = p; 00665 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; 00666 } 00667 else if (reg->repeat_range_alloc <= id) { 00668 int n; 00669 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; 00670 p = (OnigRepeatRange* )xrealloc(reg->repeat_range, 00671 sizeof(OnigRepeatRange) * n); 00672 CHECK_NULL_RETURN_MEMERR(p); 00673 reg->repeat_range = p; 00674 reg->repeat_range_alloc = n; 00675 } 00676 else { 00677 p = reg->repeat_range; 00678 } 00679 00680 p[id].lower = lower; 00681 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); 00682 return 0; 00683 } 00684 00685 static int 00686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, 00687 regex_t* reg) 00688 { 00689 int r; 00690 int num_repeat = reg->num_repeat; 00691 00692 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); 00693 if (r) return r; 00694 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00695 reg->num_repeat++; 00696 if (r) return r; 00697 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); 00698 if (r) return r; 00699 00700 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); 00701 if (r) return r; 00702 00703 r = compile_tree_empty_check(qn->target, reg, empty_info); 00704 if (r) return r; 00705 00706 if ( 00707 #ifdef USE_SUBEXP_CALL 00708 reg->num_call > 0 || 00709 #endif 00710 IS_QUANTIFIER_IN_REPEAT(qn)) { 00711 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); 00712 } 00713 else { 00714 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); 00715 } 00716 if (r) return r; 00717 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00718 return r; 00719 } 00720 00721 static int 00722 is_anychar_star_quantifier(QtfrNode* qn) 00723 { 00724 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && 00725 NTYPE(qn->target) == NT_CANY) 00726 return 1; 00727 else 00728 return 0; 00729 } 00730 00731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50 00732 #define CKN_ON (ckn > 0) 00733 00734 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00735 00736 static int 00737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00738 { 00739 int len, mod_tlen, cklen; 00740 int ckn; 00741 int infinite = IS_REPEAT_INFINITE(qn->upper); 00742 int empty_info = qn->target_empty_info; 00743 int tlen = compile_length_tree(qn->target, reg); 00744 00745 if (tlen < 0) return tlen; 00746 00747 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00748 00749 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); 00750 00751 /* anychar repeat */ 00752 if (NTYPE(qn->target) == NT_CANY) { 00753 if (qn->greedy && infinite) { 00754 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) 00755 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; 00756 else 00757 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; 00758 } 00759 } 00760 00761 if (empty_info != 0) 00762 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00763 else 00764 mod_tlen = tlen; 00765 00766 if (infinite && qn->lower <= 1) { 00767 if (qn->greedy) { 00768 if (qn->lower == 1) 00769 len = SIZE_OP_JUMP; 00770 else 00771 len = 0; 00772 00773 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; 00774 } 00775 else { 00776 if (qn->lower == 0) 00777 len = SIZE_OP_JUMP; 00778 else 00779 len = 0; 00780 00781 len += mod_tlen + SIZE_OP_PUSH + cklen; 00782 } 00783 } 00784 else if (qn->upper == 0) { 00785 if (qn->is_refered != 0) /* /(?<n>..){0}/ */ 00786 len = SIZE_OP_JUMP + tlen; 00787 else 00788 len = 0; 00789 } 00790 else if (qn->upper == 1 && qn->greedy) { 00791 if (qn->lower == 0) { 00792 if (CKN_ON) { 00793 len = SIZE_OP_STATE_CHECK_PUSH + tlen; 00794 } 00795 else { 00796 len = SIZE_OP_PUSH + tlen; 00797 } 00798 } 00799 else { 00800 len = tlen; 00801 } 00802 } 00803 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00804 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; 00805 } 00806 else { 00807 len = SIZE_OP_REPEAT_INC 00808 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 00809 if (CKN_ON) 00810 len += SIZE_OP_STATE_CHECK; 00811 } 00812 00813 return len; 00814 } 00815 00816 static int 00817 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 00818 { 00819 int r, mod_tlen; 00820 int ckn; 00821 int infinite = IS_REPEAT_INFINITE(qn->upper); 00822 int empty_info = qn->target_empty_info; 00823 int tlen = compile_length_tree(qn->target, reg); 00824 00825 if (tlen < 0) return tlen; 00826 00827 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00828 00829 if (is_anychar_star_quantifier(qn)) { 00830 r = compile_tree_n_times(qn->target, qn->lower, reg); 00831 if (r) return r; 00832 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { 00833 if (IS_MULTILINE(reg->options)) 00834 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 00835 else 00836 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 00837 if (r) return r; 00838 if (CKN_ON) { 00839 r = add_state_check_num(reg, ckn); 00840 if (r) return r; 00841 } 00842 00843 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 00844 } 00845 else { 00846 if (IS_MULTILINE(reg->options)) { 00847 r = add_opcode(reg, (CKN_ON ? 00848 OP_STATE_CHECK_ANYCHAR_ML_STAR 00849 : OP_ANYCHAR_ML_STAR)); 00850 } 00851 else { 00852 r = add_opcode(reg, (CKN_ON ? 00853 OP_STATE_CHECK_ANYCHAR_STAR 00854 : OP_ANYCHAR_STAR)); 00855 } 00856 if (r) return r; 00857 if (CKN_ON) 00858 r = add_state_check_num(reg, ckn); 00859 00860 return r; 00861 } 00862 } 00863 00864 if (empty_info != 0) 00865 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00866 else 00867 mod_tlen = tlen; 00868 00869 if (infinite && qn->lower <= 1) { 00870 if (qn->greedy) { 00871 if (qn->lower == 1) { 00872 r = add_opcode_rel_addr(reg, OP_JUMP, 00873 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); 00874 if (r) return r; 00875 } 00876 00877 if (CKN_ON) { 00878 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00879 if (r) return r; 00880 r = add_state_check_num(reg, ckn); 00881 if (r) return r; 00882 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); 00883 } 00884 else { 00885 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 00886 } 00887 if (r) return r; 00888 r = compile_tree_empty_check(qn->target, reg, empty_info); 00889 if (r) return r; 00890 r = add_opcode_rel_addr(reg, OP_JUMP, 00891 -(mod_tlen + (int )SIZE_OP_JUMP 00892 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); 00893 } 00894 else { 00895 if (qn->lower == 0) { 00896 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 00897 if (r) return r; 00898 } 00899 r = compile_tree_empty_check(qn->target, reg, empty_info); 00900 if (r) return r; 00901 if (CKN_ON) { 00902 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); 00903 if (r) return r; 00904 r = add_state_check_num(reg, ckn); 00905 if (r) return r; 00906 r = add_rel_addr(reg, 00907 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); 00908 } 00909 else 00910 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 00911 } 00912 } 00913 else if (qn->upper == 0) { 00914 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ 00915 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00916 if (r) return r; 00917 r = compile_tree(qn->target, reg); 00918 } 00919 else 00920 r = 0; 00921 } 00922 else if (qn->upper == 1 && qn->greedy) { 00923 if (qn->lower == 0) { 00924 if (CKN_ON) { 00925 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00926 if (r) return r; 00927 r = add_state_check_num(reg, ckn); 00928 if (r) return r; 00929 r = add_rel_addr(reg, tlen); 00930 } 00931 else { 00932 r = add_opcode_rel_addr(reg, OP_PUSH, tlen); 00933 } 00934 if (r) return r; 00935 } 00936 00937 r = compile_tree(qn->target, reg); 00938 } 00939 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00940 if (CKN_ON) { 00941 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00942 if (r) return r; 00943 r = add_state_check_num(reg, ckn); 00944 if (r) return r; 00945 r = add_rel_addr(reg, SIZE_OP_JUMP); 00946 } 00947 else { 00948 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 00949 } 00950 00951 if (r) return r; 00952 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00953 if (r) return r; 00954 r = compile_tree(qn->target, reg); 00955 } 00956 else { 00957 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 00958 if (CKN_ON) { 00959 if (r) return r; 00960 r = add_opcode(reg, OP_STATE_CHECK); 00961 if (r) return r; 00962 r = add_state_check_num(reg, ckn); 00963 } 00964 } 00965 return r; 00966 } 00967 00968 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 00969 00970 static int 00971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00972 { 00973 int len, mod_tlen; 00974 int infinite = IS_REPEAT_INFINITE(qn->upper); 00975 int empty_info = qn->target_empty_info; 00976 int tlen = compile_length_tree(qn->target, reg); 00977 00978 if (tlen < 0) return tlen; 00979 00980 /* anychar repeat */ 00981 if (NTYPE(qn->target) == NT_CANY) { 00982 if (qn->greedy && infinite) { 00983 if (IS_NOT_NULL(qn->next_head_exact)) 00984 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; 00985 else 00986 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; 00987 } 00988 } 00989 00990 if (empty_info != 0) 00991 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00992 else 00993 mod_tlen = tlen; 00994 00995 if (infinite && 00996 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 00997 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 00998 len = SIZE_OP_JUMP; 00999 } 01000 else { 01001 len = tlen * qn->lower; 01002 } 01003 01004 if (qn->greedy) { 01005 if (IS_NOT_NULL(qn->head_exact)) 01006 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; 01007 else if (IS_NOT_NULL(qn->next_head_exact)) 01008 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; 01009 else 01010 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; 01011 } 01012 else 01013 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; 01014 } 01015 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01016 len = SIZE_OP_JUMP + tlen; 01017 } 01018 else if (!infinite && qn->greedy && 01019 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01020 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01021 len = tlen * qn->lower; 01022 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); 01023 } 01024 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01025 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; 01026 } 01027 else { 01028 len = SIZE_OP_REPEAT_INC 01029 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 01030 } 01031 01032 return len; 01033 } 01034 01035 static int 01036 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 01037 { 01038 int i, r, mod_tlen; 01039 int infinite = IS_REPEAT_INFINITE(qn->upper); 01040 int empty_info = qn->target_empty_info; 01041 int tlen = compile_length_tree(qn->target, reg); 01042 01043 if (tlen < 0) return tlen; 01044 01045 if (is_anychar_star_quantifier(qn)) { 01046 r = compile_tree_n_times(qn->target, qn->lower, reg); 01047 if (r) return r; 01048 if (IS_NOT_NULL(qn->next_head_exact)) { 01049 if (IS_MULTILINE(reg->options)) 01050 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 01051 else 01052 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 01053 if (r) return r; 01054 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01055 } 01056 else { 01057 if (IS_MULTILINE(reg->options)) 01058 return add_opcode(reg, OP_ANYCHAR_ML_STAR); 01059 else 01060 return add_opcode(reg, OP_ANYCHAR_STAR); 01061 } 01062 } 01063 01064 if (empty_info != 0) 01065 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 01066 else 01067 mod_tlen = tlen; 01068 01069 if (infinite && 01070 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01071 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 01072 if (qn->greedy) { 01073 if (IS_NOT_NULL(qn->head_exact)) 01074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); 01075 else if (IS_NOT_NULL(qn->next_head_exact)) 01076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); 01077 else 01078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); 01079 } 01080 else { 01081 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); 01082 } 01083 if (r) return r; 01084 } 01085 else { 01086 r = compile_tree_n_times(qn->target, qn->lower, reg); 01087 if (r) return r; 01088 } 01089 01090 if (qn->greedy) { 01091 if (IS_NOT_NULL(qn->head_exact)) { 01092 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, 01093 mod_tlen + SIZE_OP_JUMP); 01094 if (r) return r; 01095 add_bytes(reg, NSTR(qn->head_exact)->s, 1); 01096 r = compile_tree_empty_check(qn->target, reg, empty_info); 01097 if (r) return r; 01098 r = add_opcode_rel_addr(reg, OP_JUMP, 01099 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); 01100 } 01101 else if (IS_NOT_NULL(qn->next_head_exact)) { 01102 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, 01103 mod_tlen + SIZE_OP_JUMP); 01104 if (r) return r; 01105 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01106 r = compile_tree_empty_check(qn->target, reg, empty_info); 01107 if (r) return r; 01108 r = add_opcode_rel_addr(reg, OP_JUMP, 01109 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); 01110 } 01111 else { 01112 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 01113 if (r) return r; 01114 r = compile_tree_empty_check(qn->target, reg, empty_info); 01115 if (r) return r; 01116 r = add_opcode_rel_addr(reg, OP_JUMP, 01117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); 01118 } 01119 } 01120 else { 01121 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 01122 if (r) return r; 01123 r = compile_tree_empty_check(qn->target, reg, empty_info); 01124 if (r) return r; 01125 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 01126 } 01127 } 01128 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01129 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01130 if (r) return r; 01131 r = compile_tree(qn->target, reg); 01132 } 01133 else if (!infinite && qn->greedy && 01134 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01135 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01136 int n = qn->upper - qn->lower; 01137 01138 r = compile_tree_n_times(qn->target, qn->lower, reg); 01139 if (r) return r; 01140 01141 for (i = 0; i < n; i++) { 01142 r = add_opcode_rel_addr(reg, OP_PUSH, 01143 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); 01144 if (r) return r; 01145 r = compile_tree(qn->target, reg); 01146 if (r) return r; 01147 } 01148 } 01149 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01150 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 01151 if (r) return r; 01152 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01153 if (r) return r; 01154 r = compile_tree(qn->target, reg); 01155 } 01156 else { 01157 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 01158 } 01159 return r; 01160 } 01161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 01162 01163 static int 01164 compile_length_option_node(EncloseNode* node, regex_t* reg) 01165 { 01166 int tlen; 01167 OnigOptionType prev = reg->options; 01168 01169 reg->options = node->option; 01170 tlen = compile_length_tree(node->target, reg); 01171 reg->options = prev; 01172 01173 if (tlen < 0) return tlen; 01174 01175 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01176 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL 01177 + tlen + SIZE_OP_SET_OPTION; 01178 } 01179 else 01180 return tlen; 01181 } 01182 01183 static int 01184 compile_option_node(EncloseNode* node, regex_t* reg) 01185 { 01186 int r; 01187 OnigOptionType prev = reg->options; 01188 01189 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01190 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); 01191 if (r) return r; 01192 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01193 if (r) return r; 01194 r = add_opcode(reg, OP_FAIL); 01195 if (r) return r; 01196 } 01197 01198 reg->options = node->option; 01199 r = compile_tree(node->target, reg); 01200 reg->options = prev; 01201 01202 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01203 if (r) return r; 01204 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01205 } 01206 return r; 01207 } 01208 01209 static int 01210 compile_length_enclose_node(EncloseNode* node, regex_t* reg) 01211 { 01212 int len; 01213 int tlen; 01214 01215 if (node->type == ENCLOSE_OPTION) 01216 return compile_length_option_node(node, reg); 01217 01218 if (node->target) { 01219 tlen = compile_length_tree(node->target, reg); 01220 if (tlen < 0) return tlen; 01221 } 01222 else 01223 tlen = 0; 01224 01225 switch (node->type) { 01226 case ENCLOSE_MEMORY: 01227 #ifdef USE_SUBEXP_CALL 01228 if (IS_ENCLOSE_CALLED(node)) { 01229 len = SIZE_OP_MEMORY_START_PUSH + tlen 01230 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; 01231 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01232 len += (IS_ENCLOSE_RECURSION(node) 01233 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01234 else 01235 len += (IS_ENCLOSE_RECURSION(node) 01236 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01237 } 01238 else 01239 #endif 01240 { 01241 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01242 len = SIZE_OP_MEMORY_START_PUSH; 01243 else 01244 len = SIZE_OP_MEMORY_START; 01245 01246 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) 01247 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); 01248 } 01249 break; 01250 01251 case ENCLOSE_STOP_BACKTRACK: 01252 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01253 QtfrNode* qn = NQTFR(node->target); 01254 tlen = compile_length_tree(qn->target, reg); 01255 if (tlen < 0) return tlen; 01256 01257 len = tlen * qn->lower 01258 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; 01259 } 01260 else { 01261 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; 01262 } 01263 break; 01264 01265 default: 01266 return ONIGERR_TYPE_BUG; 01267 break; 01268 } 01269 01270 return len; 01271 } 01272 01273 static int get_char_length_tree(Node* node, regex_t* reg, int* len); 01274 01275 static int 01276 compile_enclose_node(EncloseNode* node, regex_t* reg) 01277 { 01278 int r, len; 01279 01280 if (node->type == ENCLOSE_OPTION) 01281 return compile_option_node(node, reg); 01282 01283 switch (node->type) { 01284 case ENCLOSE_MEMORY: 01285 #ifdef USE_SUBEXP_CALL 01286 if (IS_ENCLOSE_CALLED(node)) { 01287 r = add_opcode(reg, OP_CALL); 01288 if (r) return r; 01289 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; 01290 node->state |= NST_ADDR_FIXED; 01291 r = add_abs_addr(reg, (int )node->call_addr); 01292 if (r) return r; 01293 len = compile_length_tree(node->target, reg); 01294 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); 01295 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01296 len += (IS_ENCLOSE_RECURSION(node) 01297 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01298 else 01299 len += (IS_ENCLOSE_RECURSION(node) 01300 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01301 01302 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01303 if (r) return r; 01304 } 01305 #endif 01306 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01307 r = add_opcode(reg, OP_MEMORY_START_PUSH); 01308 else 01309 r = add_opcode(reg, OP_MEMORY_START); 01310 if (r) return r; 01311 r = add_mem_num(reg, node->regnum); 01312 if (r) return r; 01313 r = compile_tree(node->target, reg); 01314 if (r) return r; 01315 #ifdef USE_SUBEXP_CALL 01316 if (IS_ENCLOSE_CALLED(node)) { 01317 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01318 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01319 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); 01320 else 01321 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01322 ? OP_MEMORY_END_REC : OP_MEMORY_END)); 01323 01324 if (r) return r; 01325 r = add_mem_num(reg, node->regnum); 01326 if (r) return r; 01327 r = add_opcode(reg, OP_RETURN); 01328 } 01329 else 01330 #endif 01331 { 01332 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01333 r = add_opcode(reg, OP_MEMORY_END_PUSH); 01334 else 01335 r = add_opcode(reg, OP_MEMORY_END); 01336 if (r) return r; 01337 r = add_mem_num(reg, node->regnum); 01338 } 01339 break; 01340 01341 case ENCLOSE_STOP_BACKTRACK: 01342 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01343 QtfrNode* qn = NQTFR(node->target); 01344 r = compile_tree_n_times(qn->target, qn->lower, reg); 01345 if (r) return r; 01346 01347 len = compile_length_tree(qn->target, reg); 01348 if (len < 0) return len; 01349 01350 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); 01351 if (r) return r; 01352 r = compile_tree(qn->target, reg); 01353 if (r) return r; 01354 r = add_opcode(reg, OP_POP); 01355 if (r) return r; 01356 r = add_opcode_rel_addr(reg, OP_JUMP, 01357 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); 01358 } 01359 else { 01360 r = add_opcode(reg, OP_PUSH_STOP_BT); 01361 if (r) return r; 01362 r = compile_tree(node->target, reg); 01363 if (r) return r; 01364 r = add_opcode(reg, OP_POP_STOP_BT); 01365 } 01366 break; 01367 01368 default: 01369 return ONIGERR_TYPE_BUG; 01370 break; 01371 } 01372 01373 return r; 01374 } 01375 01376 static int 01377 compile_length_anchor_node(AnchorNode* node, regex_t* reg) 01378 { 01379 int len; 01380 int tlen = 0; 01381 01382 if (node->target) { 01383 tlen = compile_length_tree(node->target, reg); 01384 if (tlen < 0) return tlen; 01385 } 01386 01387 switch (node->type) { 01388 case ANCHOR_PREC_READ: 01389 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; 01390 break; 01391 case ANCHOR_PREC_READ_NOT: 01392 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; 01393 break; 01394 case ANCHOR_LOOK_BEHIND: 01395 len = SIZE_OP_LOOK_BEHIND + tlen; 01396 break; 01397 case ANCHOR_LOOK_BEHIND_NOT: 01398 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; 01399 break; 01400 01401 default: 01402 len = SIZE_OPCODE; 01403 break; 01404 } 01405 01406 return len; 01407 } 01408 01409 static int 01410 compile_anchor_node(AnchorNode* node, regex_t* reg) 01411 { 01412 int r, len; 01413 01414 switch (node->type) { 01415 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; 01416 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; 01417 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; 01418 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; 01419 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; 01420 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; 01421 01422 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; 01423 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; 01424 #ifdef USE_WORD_BEGIN_END 01425 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; 01426 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; 01427 #endif 01428 01429 case ANCHOR_PREC_READ: 01430 r = add_opcode(reg, OP_PUSH_POS); 01431 if (r) return r; 01432 r = compile_tree(node->target, reg); 01433 if (r) return r; 01434 r = add_opcode(reg, OP_POP_POS); 01435 break; 01436 01437 case ANCHOR_PREC_READ_NOT: 01438 len = compile_length_tree(node->target, reg); 01439 if (len < 0) return len; 01440 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); 01441 if (r) return r; 01442 r = compile_tree(node->target, reg); 01443 if (r) return r; 01444 r = add_opcode(reg, OP_FAIL_POS); 01445 break; 01446 01447 case ANCHOR_LOOK_BEHIND: 01448 { 01449 int n; 01450 r = add_opcode(reg, OP_LOOK_BEHIND); 01451 if (r) return r; 01452 if (node->char_len < 0) { 01453 r = get_char_length_tree(node->target, reg, &n); 01454 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01455 } 01456 else 01457 n = node->char_len; 01458 r = add_length(reg, n); 01459 if (r) return r; 01460 r = compile_tree(node->target, reg); 01461 } 01462 break; 01463 01464 case ANCHOR_LOOK_BEHIND_NOT: 01465 { 01466 int n; 01467 len = compile_length_tree(node->target, reg); 01468 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, 01469 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); 01470 if (r) return r; 01471 if (node->char_len < 0) { 01472 r = get_char_length_tree(node->target, reg, &n); 01473 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01474 } 01475 else 01476 n = node->char_len; 01477 r = add_length(reg, n); 01478 if (r) return r; 01479 r = compile_tree(node->target, reg); 01480 if (r) return r; 01481 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); 01482 } 01483 break; 01484 01485 default: 01486 return ONIGERR_TYPE_BUG; 01487 break; 01488 } 01489 01490 return r; 01491 } 01492 01493 static int 01494 compile_length_tree(Node* node, regex_t* reg) 01495 { 01496 int len, type, r; 01497 01498 type = NTYPE(node); 01499 switch (type) { 01500 case NT_LIST: 01501 len = 0; 01502 do { 01503 r = compile_length_tree(NCAR(node), reg); 01504 if (r < 0) return r; 01505 len += r; 01506 } while (IS_NOT_NULL(node = NCDR(node))); 01507 r = len; 01508 break; 01509 01510 case NT_ALT: 01511 { 01512 int n; 01513 01514 n = r = 0; 01515 do { 01516 r += compile_length_tree(NCAR(node), reg); 01517 n++; 01518 } while (IS_NOT_NULL(node = NCDR(node))); 01519 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); 01520 } 01521 break; 01522 01523 case NT_STR: 01524 if (NSTRING_IS_RAW(node)) 01525 r = compile_length_string_raw_node(NSTR(node), reg); 01526 else 01527 r = compile_length_string_node(node, reg); 01528 break; 01529 01530 case NT_CCLASS: 01531 r = compile_length_cclass_node(NCCLASS(node), reg); 01532 break; 01533 01534 case NT_CTYPE: 01535 case NT_CANY: 01536 r = SIZE_OPCODE; 01537 break; 01538 01539 case NT_BREF: 01540 { 01541 BRefNode* br = NBREF(node); 01542 01543 #ifdef USE_BACKREF_WITH_LEVEL 01544 if (IS_BACKREF_NEST_LEVEL(br)) { 01545 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + 01546 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01547 } 01548 else 01549 #endif 01550 if (br->back_num == 1) { 01551 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) 01552 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); 01553 } 01554 else { 01555 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01556 } 01557 } 01558 break; 01559 01560 #ifdef USE_SUBEXP_CALL 01561 case NT_CALL: 01562 r = SIZE_OP_CALL; 01563 break; 01564 #endif 01565 01566 case NT_QTFR: 01567 r = compile_length_quantifier_node(NQTFR(node), reg); 01568 break; 01569 01570 case NT_ENCLOSE: 01571 r = compile_length_enclose_node(NENCLOSE(node), reg); 01572 break; 01573 01574 case NT_ANCHOR: 01575 r = compile_length_anchor_node(NANCHOR(node), reg); 01576 break; 01577 01578 default: 01579 return ONIGERR_TYPE_BUG; 01580 break; 01581 } 01582 01583 return r; 01584 } 01585 01586 static int 01587 compile_tree(Node* node, regex_t* reg) 01588 { 01589 int n, type, len, pos, r = 0; 01590 01591 type = NTYPE(node); 01592 switch (type) { 01593 case NT_LIST: 01594 do { 01595 r = compile_tree(NCAR(node), reg); 01596 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01597 break; 01598 01599 case NT_ALT: 01600 { 01601 Node* x = node; 01602 len = 0; 01603 do { 01604 len += compile_length_tree(NCAR(x), reg); 01605 if (NCDR(x) != NULL) { 01606 len += SIZE_OP_PUSH + SIZE_OP_JUMP; 01607 } 01608 } while (IS_NOT_NULL(x = NCDR(x))); 01609 pos = reg->used + len; /* goal position */ 01610 01611 do { 01612 len = compile_length_tree(NCAR(node), reg); 01613 if (IS_NOT_NULL(NCDR(node))) { 01614 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); 01615 if (r) break; 01616 } 01617 r = compile_tree(NCAR(node), reg); 01618 if (r) break; 01619 if (IS_NOT_NULL(NCDR(node))) { 01620 len = pos - (reg->used + SIZE_OP_JUMP); 01621 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01622 if (r) break; 01623 } 01624 } while (IS_NOT_NULL(node = NCDR(node))); 01625 } 01626 break; 01627 01628 case NT_STR: 01629 if (NSTRING_IS_RAW(node)) 01630 r = compile_string_raw_node(NSTR(node), reg); 01631 else 01632 r = compile_string_node(node, reg); 01633 break; 01634 01635 case NT_CCLASS: 01636 r = compile_cclass_node(NCCLASS(node), reg); 01637 break; 01638 01639 case NT_CTYPE: 01640 { 01641 int op; 01642 01643 switch (NCTYPE(node)->ctype) { 01644 case ONIGENC_CTYPE_WORD: 01645 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; 01646 else op = OP_WORD; 01647 break; 01648 default: 01649 return ONIGERR_TYPE_BUG; 01650 break; 01651 } 01652 r = add_opcode(reg, op); 01653 } 01654 break; 01655 01656 case NT_CANY: 01657 if (IS_MULTILINE(reg->options)) 01658 r = add_opcode(reg, OP_ANYCHAR_ML); 01659 else 01660 r = add_opcode(reg, OP_ANYCHAR); 01661 break; 01662 01663 case NT_BREF: 01664 { 01665 BRefNode* br = NBREF(node); 01666 01667 #ifdef USE_BACKREF_WITH_LEVEL 01668 if (IS_BACKREF_NEST_LEVEL(br)) { 01669 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); 01670 if (r) return r; 01671 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); 01672 if (r) return r; 01673 r = add_length(reg, br->nest_level); 01674 if (r) return r; 01675 01676 goto add_bacref_mems; 01677 } 01678 else 01679 #endif 01680 if (br->back_num == 1) { 01681 n = br->back_static[0]; 01682 if (IS_IGNORECASE(reg->options)) { 01683 r = add_opcode(reg, OP_BACKREFN_IC); 01684 if (r) return r; 01685 r = add_mem_num(reg, n); 01686 } 01687 else { 01688 switch (n) { 01689 case 1: r = add_opcode(reg, OP_BACKREF1); break; 01690 case 2: r = add_opcode(reg, OP_BACKREF2); break; 01691 default: 01692 r = add_opcode(reg, OP_BACKREFN); 01693 if (r) return r; 01694 r = add_mem_num(reg, n); 01695 break; 01696 } 01697 } 01698 } 01699 else { 01700 int i; 01701 int* p; 01702 01703 if (IS_IGNORECASE(reg->options)) { 01704 r = add_opcode(reg, OP_BACKREF_MULTI_IC); 01705 } 01706 else { 01707 r = add_opcode(reg, OP_BACKREF_MULTI); 01708 } 01709 if (r) return r; 01710 01711 #ifdef USE_BACKREF_WITH_LEVEL 01712 add_bacref_mems: 01713 #endif 01714 r = add_length(reg, br->back_num); 01715 if (r) return r; 01716 p = BACKREFS_P(br); 01717 for (i = br->back_num - 1; i >= 0; i--) { 01718 r = add_mem_num(reg, p[i]); 01719 if (r) return r; 01720 } 01721 } 01722 } 01723 break; 01724 01725 #ifdef USE_SUBEXP_CALL 01726 case NT_CALL: 01727 r = compile_call(NCALL(node), reg); 01728 break; 01729 #endif 01730 01731 case NT_QTFR: 01732 r = compile_quantifier_node(NQTFR(node), reg); 01733 break; 01734 01735 case NT_ENCLOSE: 01736 r = compile_enclose_node(NENCLOSE(node), reg); 01737 break; 01738 01739 case NT_ANCHOR: 01740 r = compile_anchor_node(NANCHOR(node), reg); 01741 break; 01742 01743 default: 01744 #ifdef ONIG_DEBUG 01745 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); 01746 #endif 01747 break; 01748 } 01749 01750 return r; 01751 } 01752 01753 #ifdef USE_NAMED_GROUP 01754 01755 static int 01756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) 01757 { 01758 int r = 0; 01759 Node* node = *plink; 01760 01761 switch (NTYPE(node)) { 01762 case NT_LIST: 01763 case NT_ALT: 01764 do { 01765 r = noname_disable_map(&(NCAR(node)), map, counter); 01766 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01767 break; 01768 01769 case NT_QTFR: 01770 { 01771 Node** ptarget = &(NQTFR(node)->target); 01772 Node* old = *ptarget; 01773 r = noname_disable_map(ptarget, map, counter); 01774 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { 01775 onig_reduce_nested_quantifier(node, *ptarget); 01776 } 01777 } 01778 break; 01779 01780 case NT_ENCLOSE: 01781 { 01782 EncloseNode* en = NENCLOSE(node); 01783 if (en->type == ENCLOSE_MEMORY) { 01784 if (IS_ENCLOSE_NAMED_GROUP(en)) { 01785 (*counter)++; 01786 map[en->regnum].new_val = *counter; 01787 en->regnum = *counter; 01788 r = noname_disable_map(&(en->target), map, counter); 01789 } 01790 else { 01791 *plink = en->target; 01792 en->target = NULL_NODE; 01793 onig_node_free(node); 01794 r = noname_disable_map(plink, map, counter); 01795 } 01796 } 01797 else 01798 r = noname_disable_map(&(en->target), map, counter); 01799 } 01800 break; 01801 01802 case NT_ANCHOR: 01803 { 01804 AnchorNode* an = NANCHOR(node); 01805 switch (an->type) { 01806 case ANCHOR_PREC_READ: 01807 case ANCHOR_PREC_READ_NOT: 01808 case ANCHOR_LOOK_BEHIND: 01809 case ANCHOR_LOOK_BEHIND_NOT: 01810 r = noname_disable_map(&(an->target), map, counter); 01811 break; 01812 } 01813 } 01814 break; 01815 01816 default: 01817 break; 01818 } 01819 01820 return r; 01821 } 01822 01823 static int 01824 renumber_node_backref(Node* node, GroupNumRemap* map) 01825 { 01826 int i, pos, n, old_num; 01827 int *backs; 01828 BRefNode* bn = NBREF(node); 01829 01830 if (! IS_BACKREF_NAME_REF(bn)) 01831 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 01832 01833 old_num = bn->back_num; 01834 if (IS_NULL(bn->back_dynamic)) 01835 backs = bn->back_static; 01836 else 01837 backs = bn->back_dynamic; 01838 01839 for (i = 0, pos = 0; i < old_num; i++) { 01840 n = map[backs[i]].new_val; 01841 if (n > 0) { 01842 backs[pos] = n; 01843 pos++; 01844 } 01845 } 01846 01847 bn->back_num = pos; 01848 return 0; 01849 } 01850 01851 static int 01852 renumber_by_map(Node* node, GroupNumRemap* map) 01853 { 01854 int r = 0; 01855 01856 switch (NTYPE(node)) { 01857 case NT_LIST: 01858 case NT_ALT: 01859 do { 01860 r = renumber_by_map(NCAR(node), map); 01861 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01862 break; 01863 case NT_QTFR: 01864 r = renumber_by_map(NQTFR(node)->target, map); 01865 break; 01866 case NT_ENCLOSE: 01867 r = renumber_by_map(NENCLOSE(node)->target, map); 01868 break; 01869 01870 case NT_BREF: 01871 r = renumber_node_backref(node, map); 01872 break; 01873 01874 case NT_ANCHOR: 01875 { 01876 AnchorNode* an = NANCHOR(node); 01877 switch (an->type) { 01878 case ANCHOR_PREC_READ: 01879 case ANCHOR_PREC_READ_NOT: 01880 case ANCHOR_LOOK_BEHIND: 01881 case ANCHOR_LOOK_BEHIND_NOT: 01882 r = renumber_by_map(an->target, map); 01883 break; 01884 } 01885 } 01886 break; 01887 01888 default: 01889 break; 01890 } 01891 01892 return r; 01893 } 01894 01895 static int 01896 numbered_ref_check(Node* node) 01897 { 01898 int r = 0; 01899 01900 switch (NTYPE(node)) { 01901 case NT_LIST: 01902 case NT_ALT: 01903 do { 01904 r = numbered_ref_check(NCAR(node)); 01905 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01906 break; 01907 case NT_QTFR: 01908 r = numbered_ref_check(NQTFR(node)->target); 01909 break; 01910 case NT_ENCLOSE: 01911 r = numbered_ref_check(NENCLOSE(node)->target); 01912 break; 01913 01914 case NT_BREF: 01915 if (! IS_BACKREF_NAME_REF(NBREF(node))) 01916 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 01917 break; 01918 01919 default: 01920 break; 01921 } 01922 01923 return r; 01924 } 01925 01926 static int 01927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) 01928 { 01929 int r, i, pos, counter; 01930 BitStatusType loc; 01931 GroupNumRemap* map; 01932 01933 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); 01934 CHECK_NULL_RETURN_MEMERR(map); 01935 for (i = 1; i <= env->num_mem; i++) { 01936 map[i].new_val = 0; 01937 } 01938 counter = 0; 01939 r = noname_disable_map(root, map, &counter); 01940 if (r != 0) return r; 01941 01942 r = renumber_by_map(*root, map); 01943 if (r != 0) return r; 01944 01945 for (i = 1, pos = 1; i <= env->num_mem; i++) { 01946 if (map[i].new_val > 0) { 01947 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; 01948 pos++; 01949 } 01950 } 01951 01952 loc = env->capture_history; 01953 BIT_STATUS_CLEAR(env->capture_history); 01954 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 01955 if (BIT_STATUS_AT(loc, i)) { 01956 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); 01957 } 01958 } 01959 01960 env->num_mem = env->num_named; 01961 reg->num_mem = env->num_named; 01962 01963 return onig_renumber_name_table(reg, map); 01964 } 01965 #endif /* USE_NAMED_GROUP */ 01966 01967 #ifdef USE_SUBEXP_CALL 01968 static int 01969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) 01970 { 01971 int i, offset; 01972 EncloseNode* en; 01973 AbsAddrType addr; 01974 01975 for (i = 0; i < uslist->num; i++) { 01976 en = NENCLOSE(uslist->us[i].target); 01977 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; 01978 addr = en->call_addr; 01979 offset = uslist->us[i].offset; 01980 01981 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); 01982 } 01983 return 0; 01984 } 01985 #endif 01986 01987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 01988 static int 01989 quantifiers_memory_node_info(Node* node) 01990 { 01991 int r = 0; 01992 01993 switch (NTYPE(node)) { 01994 case NT_LIST: 01995 case NT_ALT: 01996 { 01997 int v; 01998 do { 01999 v = quantifiers_memory_node_info(NCAR(node)); 02000 if (v > r) r = v; 02001 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); 02002 } 02003 break; 02004 02005 #ifdef USE_SUBEXP_CALL 02006 case NT_CALL: 02007 if (IS_CALL_RECURSION(NCALL(node))) { 02008 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ 02009 } 02010 else 02011 r = quantifiers_memory_node_info(NCALL(node)->target); 02012 break; 02013 #endif 02014 02015 case NT_QTFR: 02016 { 02017 QtfrNode* qn = NQTFR(node); 02018 if (qn->upper != 0) { 02019 r = quantifiers_memory_node_info(qn->target); 02020 } 02021 } 02022 break; 02023 02024 case NT_ENCLOSE: 02025 { 02026 EncloseNode* en = NENCLOSE(node); 02027 switch (en->type) { 02028 case ENCLOSE_MEMORY: 02029 return NQ_TARGET_IS_EMPTY_MEM; 02030 break; 02031 02032 case ENCLOSE_OPTION: 02033 case ENCLOSE_STOP_BACKTRACK: 02034 r = quantifiers_memory_node_info(en->target); 02035 break; 02036 default: 02037 break; 02038 } 02039 } 02040 break; 02041 02042 case NT_BREF: 02043 case NT_STR: 02044 case NT_CTYPE: 02045 case NT_CCLASS: 02046 case NT_CANY: 02047 case NT_ANCHOR: 02048 default: 02049 break; 02050 } 02051 02052 return r; 02053 } 02054 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ 02055 02056 static int 02057 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) 02058 { 02059 OnigDistance tmin; 02060 int r = 0; 02061 02062 *min = 0; 02063 switch (NTYPE(node)) { 02064 case NT_BREF: 02065 { 02066 int i; 02067 int* backs; 02068 Node** nodes = SCANENV_MEM_NODES(env); 02069 BRefNode* br = NBREF(node); 02070 if (br->state & NST_RECURSION) break; 02071 02072 backs = BACKREFS_P(br); 02073 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02074 r = get_min_match_length(nodes[backs[0]], min, env); 02075 if (r != 0) break; 02076 for (i = 1; i < br->back_num; i++) { 02077 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02078 r = get_min_match_length(nodes[backs[i]], &tmin, env); 02079 if (r != 0) break; 02080 if (*min > tmin) *min = tmin; 02081 } 02082 } 02083 break; 02084 02085 #ifdef USE_SUBEXP_CALL 02086 case NT_CALL: 02087 if (IS_CALL_RECURSION(NCALL(node))) { 02088 EncloseNode* en = NENCLOSE(NCALL(node)->target); 02089 if (IS_ENCLOSE_MIN_FIXED(en)) 02090 *min = en->min_len; 02091 } 02092 else 02093 r = get_min_match_length(NCALL(node)->target, min, env); 02094 break; 02095 #endif 02096 02097 case NT_LIST: 02098 do { 02099 r = get_min_match_length(NCAR(node), &tmin, env); 02100 if (r == 0) *min += tmin; 02101 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02102 break; 02103 02104 case NT_ALT: 02105 { 02106 Node *x, *y; 02107 y = node; 02108 do { 02109 x = NCAR(y); 02110 r = get_min_match_length(x, &tmin, env); 02111 if (r != 0) break; 02112 if (y == node) *min = tmin; 02113 else if (*min > tmin) *min = tmin; 02114 } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); 02115 } 02116 break; 02117 02118 case NT_STR: 02119 { 02120 StrNode* sn = NSTR(node); 02121 *min = sn->end - sn->s; 02122 } 02123 break; 02124 02125 case NT_CTYPE: 02126 *min = 1; 02127 break; 02128 02129 case NT_CCLASS: 02130 case NT_CANY: 02131 *min = 1; 02132 break; 02133 02134 case NT_QTFR: 02135 { 02136 QtfrNode* qn = NQTFR(node); 02137 02138 if (qn->lower > 0) { 02139 r = get_min_match_length(qn->target, min, env); 02140 if (r == 0) 02141 *min = distance_multiply(*min, qn->lower); 02142 } 02143 } 02144 break; 02145 02146 case NT_ENCLOSE: 02147 { 02148 EncloseNode* en = NENCLOSE(node); 02149 switch (en->type) { 02150 case ENCLOSE_MEMORY: 02151 #ifdef USE_SUBEXP_CALL 02152 if (IS_ENCLOSE_MIN_FIXED(en)) 02153 *min = en->min_len; 02154 else { 02155 r = get_min_match_length(en->target, min, env); 02156 if (r == 0) { 02157 en->min_len = *min; 02158 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); 02159 } 02160 } 02161 break; 02162 #endif 02163 case ENCLOSE_OPTION: 02164 case ENCLOSE_STOP_BACKTRACK: 02165 r = get_min_match_length(en->target, min, env); 02166 break; 02167 } 02168 } 02169 break; 02170 02171 case NT_ANCHOR: 02172 default: 02173 break; 02174 } 02175 02176 return r; 02177 } 02178 02179 static int 02180 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) 02181 { 02182 OnigDistance tmax; 02183 int r = 0; 02184 02185 *max = 0; 02186 switch (NTYPE(node)) { 02187 case NT_LIST: 02188 do { 02189 r = get_max_match_length(NCAR(node), &tmax, env); 02190 if (r == 0) 02191 *max = distance_add(*max, tmax); 02192 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02193 break; 02194 02195 case NT_ALT: 02196 do { 02197 r = get_max_match_length(NCAR(node), &tmax, env); 02198 if (r == 0 && *max < tmax) *max = tmax; 02199 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02200 break; 02201 02202 case NT_STR: 02203 { 02204 StrNode* sn = NSTR(node); 02205 *max = sn->end - sn->s; 02206 } 02207 break; 02208 02209 case NT_CTYPE: 02210 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02211 break; 02212 02213 case NT_CCLASS: 02214 case NT_CANY: 02215 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02216 break; 02217 02218 case NT_BREF: 02219 { 02220 int i; 02221 int* backs; 02222 Node** nodes = SCANENV_MEM_NODES(env); 02223 BRefNode* br = NBREF(node); 02224 if (br->state & NST_RECURSION) { 02225 *max = ONIG_INFINITE_DISTANCE; 02226 break; 02227 } 02228 backs = BACKREFS_P(br); 02229 for (i = 0; i < br->back_num; i++) { 02230 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02231 r = get_max_match_length(nodes[backs[i]], &tmax, env); 02232 if (r != 0) break; 02233 if (*max < tmax) *max = tmax; 02234 } 02235 } 02236 break; 02237 02238 #ifdef USE_SUBEXP_CALL 02239 case NT_CALL: 02240 if (! IS_CALL_RECURSION(NCALL(node))) 02241 r = get_max_match_length(NCALL(node)->target, max, env); 02242 else 02243 *max = ONIG_INFINITE_DISTANCE; 02244 break; 02245 #endif 02246 02247 case NT_QTFR: 02248 { 02249 QtfrNode* qn = NQTFR(node); 02250 02251 if (qn->upper != 0) { 02252 r = get_max_match_length(qn->target, max, env); 02253 if (r == 0 && *max != 0) { 02254 if (! IS_REPEAT_INFINITE(qn->upper)) 02255 *max = distance_multiply(*max, qn->upper); 02256 else 02257 *max = ONIG_INFINITE_DISTANCE; 02258 } 02259 } 02260 } 02261 break; 02262 02263 case NT_ENCLOSE: 02264 { 02265 EncloseNode* en = NENCLOSE(node); 02266 switch (en->type) { 02267 case ENCLOSE_MEMORY: 02268 #ifdef USE_SUBEXP_CALL 02269 if (IS_ENCLOSE_MAX_FIXED(en)) 02270 *max = en->max_len; 02271 else { 02272 r = get_max_match_length(en->target, max, env); 02273 if (r == 0) { 02274 en->max_len = *max; 02275 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); 02276 } 02277 } 02278 break; 02279 #endif 02280 case ENCLOSE_OPTION: 02281 case ENCLOSE_STOP_BACKTRACK: 02282 r = get_max_match_length(en->target, max, env); 02283 break; 02284 } 02285 } 02286 break; 02287 02288 case NT_ANCHOR: 02289 default: 02290 break; 02291 } 02292 02293 return r; 02294 } 02295 02296 #define GET_CHAR_LEN_VARLEN -1 02297 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2 02298 02299 /* fixed size pattern node only */ 02300 static int 02301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) 02302 { 02303 int tlen; 02304 int r = 0; 02305 02306 level++; 02307 *len = 0; 02308 switch (NTYPE(node)) { 02309 case NT_LIST: 02310 do { 02311 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02312 if (r == 0) 02313 *len = (int)distance_add(*len, tlen); 02314 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02315 break; 02316 02317 case NT_ALT: 02318 { 02319 int tlen2; 02320 int varlen = 0; 02321 02322 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02323 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { 02324 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); 02325 if (r == 0) { 02326 if (tlen != tlen2) 02327 varlen = 1; 02328 } 02329 } 02330 if (r == 0) { 02331 if (varlen != 0) { 02332 if (level == 1) 02333 r = GET_CHAR_LEN_TOP_ALT_VARLEN; 02334 else 02335 r = GET_CHAR_LEN_VARLEN; 02336 } 02337 else 02338 *len = tlen; 02339 } 02340 } 02341 break; 02342 02343 case NT_STR: 02344 { 02345 StrNode* sn = NSTR(node); 02346 UChar *s = sn->s; 02347 while (s < sn->end) { 02348 s += enclen(reg->enc, s, sn->end); 02349 (*len)++; 02350 } 02351 } 02352 break; 02353 02354 case NT_QTFR: 02355 { 02356 QtfrNode* qn = NQTFR(node); 02357 if (qn->lower == qn->upper) { 02358 r = get_char_length_tree1(qn->target, reg, &tlen, level); 02359 if (r == 0) 02360 *len = (int)distance_multiply(tlen, qn->lower); 02361 } 02362 else 02363 r = GET_CHAR_LEN_VARLEN; 02364 } 02365 break; 02366 02367 #ifdef USE_SUBEXP_CALL 02368 case NT_CALL: 02369 if (! IS_CALL_RECURSION(NCALL(node))) 02370 r = get_char_length_tree1(NCALL(node)->target, reg, len, level); 02371 else 02372 r = GET_CHAR_LEN_VARLEN; 02373 break; 02374 #endif 02375 02376 case NT_CTYPE: 02377 *len = 1; 02378 break; 02379 02380 case NT_CCLASS: 02381 case NT_CANY: 02382 *len = 1; 02383 break; 02384 02385 case NT_ENCLOSE: 02386 { 02387 EncloseNode* en = NENCLOSE(node); 02388 switch (en->type) { 02389 case ENCLOSE_MEMORY: 02390 #ifdef USE_SUBEXP_CALL 02391 if (IS_ENCLOSE_CLEN_FIXED(en)) 02392 *len = en->char_len; 02393 else { 02394 r = get_char_length_tree1(en->target, reg, len, level); 02395 if (r == 0) { 02396 en->char_len = *len; 02397 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); 02398 } 02399 } 02400 break; 02401 #endif 02402 case ENCLOSE_OPTION: 02403 case ENCLOSE_STOP_BACKTRACK: 02404 r = get_char_length_tree1(en->target, reg, len, level); 02405 break; 02406 default: 02407 break; 02408 } 02409 } 02410 break; 02411 02412 case NT_ANCHOR: 02413 break; 02414 02415 default: 02416 r = GET_CHAR_LEN_VARLEN; 02417 break; 02418 } 02419 02420 return r; 02421 } 02422 02423 static int 02424 get_char_length_tree(Node* node, regex_t* reg, int* len) 02425 { 02426 return get_char_length_tree1(node, reg, len, 0); 02427 } 02428 02429 /* x is not included y ==> 1 : 0 */ 02430 static int 02431 is_not_included(Node* x, Node* y, regex_t* reg) 02432 { 02433 int i; 02434 OnigDistance len; 02435 OnigCodePoint code; 02436 UChar *p, c; 02437 int ytype; 02438 02439 retry: 02440 ytype = NTYPE(y); 02441 switch (NTYPE(x)) { 02442 case NT_CTYPE: 02443 { 02444 switch (ytype) { 02445 case NT_CTYPE: 02446 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && 02447 NCTYPE(y)->not != NCTYPE(x)->not) 02448 return 1; 02449 else 02450 return 0; 02451 break; 02452 02453 case NT_CCLASS: 02454 swap: 02455 { 02456 Node* tmp; 02457 tmp = x; x = y; y = tmp; 02458 goto retry; 02459 } 02460 break; 02461 02462 case NT_STR: 02463 goto swap; 02464 break; 02465 02466 default: 02467 break; 02468 } 02469 } 02470 break; 02471 02472 case NT_CCLASS: 02473 { 02474 CClassNode* xc = NCCLASS(x); 02475 switch (ytype) { 02476 case NT_CTYPE: 02477 switch (NCTYPE(y)->ctype) { 02478 case ONIGENC_CTYPE_WORD: 02479 if (NCTYPE(y)->not == 0) { 02480 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { 02481 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02482 if (BITSET_AT(xc->bs, i)) { 02483 if (IS_CODE_SB_WORD(reg->enc, i)) return 0; 02484 } 02485 } 02486 return 1; 02487 } 02488 return 0; 02489 } 02490 else { 02491 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02492 if (! IS_CODE_SB_WORD(reg->enc, i)) { 02493 if (!IS_NCCLASS_NOT(xc)) { 02494 if (BITSET_AT(xc->bs, i)) 02495 return 0; 02496 } 02497 else { 02498 if (! BITSET_AT(xc->bs, i)) 02499 return 0; 02500 } 02501 } 02502 } 02503 return 1; 02504 } 02505 break; 02506 02507 default: 02508 break; 02509 } 02510 break; 02511 02512 case NT_CCLASS: 02513 { 02514 int v; 02515 CClassNode* yc = NCCLASS(y); 02516 02517 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02518 v = BITSET_AT(xc->bs, i); 02519 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || 02520 (v == 0 && IS_NCCLASS_NOT(xc))) { 02521 v = BITSET_AT(yc->bs, i); 02522 if ((v != 0 && !IS_NCCLASS_NOT(yc)) || 02523 (v == 0 && IS_NCCLASS_NOT(yc))) 02524 return 0; 02525 } 02526 } 02527 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || 02528 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) 02529 return 1; 02530 return 0; 02531 } 02532 break; 02533 02534 case NT_STR: 02535 goto swap; 02536 break; 02537 02538 default: 02539 break; 02540 } 02541 } 02542 break; 02543 02544 case NT_STR: 02545 { 02546 StrNode* xs = NSTR(x); 02547 if (NSTRING_LEN(x) == 0) 02548 break; 02549 02550 c = *(xs->s); 02551 switch (ytype) { 02552 case NT_CTYPE: 02553 switch (NCTYPE(y)->ctype) { 02554 case ONIGENC_CTYPE_WORD: 02555 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) 02556 return NCTYPE(y)->not; 02557 else 02558 return !(NCTYPE(y)->not); 02559 break; 02560 default: 02561 break; 02562 } 02563 break; 02564 02565 case NT_CCLASS: 02566 { 02567 CClassNode* cc = NCCLASS(y); 02568 02569 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, 02570 xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); 02571 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); 02572 } 02573 break; 02574 02575 case NT_STR: 02576 { 02577 UChar *q; 02578 StrNode* ys = NSTR(y); 02579 len = NSTRING_LEN(x); 02580 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); 02581 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { 02582 /* tiny version */ 02583 return 0; 02584 } 02585 else { 02586 for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) { 02587 if (*p != *q) return 1; 02588 } 02589 } 02590 } 02591 break; 02592 02593 default: 02594 break; 02595 } 02596 } 02597 break; 02598 02599 default: 02600 break; 02601 } 02602 02603 return 0; 02604 } 02605 02606 static Node* 02607 get_head_value_node(Node* node, int exact, regex_t* reg) 02608 { 02609 Node* n = NULL_NODE; 02610 02611 switch (NTYPE(node)) { 02612 case NT_BREF: 02613 case NT_ALT: 02614 case NT_CANY: 02615 #ifdef USE_SUBEXP_CALL 02616 case NT_CALL: 02617 #endif 02618 break; 02619 02620 case NT_CTYPE: 02621 case NT_CCLASS: 02622 if (exact == 0) { 02623 n = node; 02624 } 02625 break; 02626 02627 case NT_LIST: 02628 n = get_head_value_node(NCAR(node), exact, reg); 02629 break; 02630 02631 case NT_STR: 02632 { 02633 StrNode* sn = NSTR(node); 02634 02635 if (sn->end <= sn->s) 02636 break; 02637 02638 if (exact != 0 && 02639 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { 02640 } 02641 else { 02642 n = node; 02643 } 02644 } 02645 break; 02646 02647 case NT_QTFR: 02648 { 02649 QtfrNode* qn = NQTFR(node); 02650 if (qn->lower > 0) { 02651 if (IS_NOT_NULL(qn->head_exact)) 02652 n = qn->head_exact; 02653 else 02654 n = get_head_value_node(qn->target, exact, reg); 02655 } 02656 } 02657 break; 02658 02659 case NT_ENCLOSE: 02660 { 02661 EncloseNode* en = NENCLOSE(node); 02662 switch (en->type) { 02663 case ENCLOSE_OPTION: 02664 { 02665 OnigOptionType options = reg->options; 02666 02667 reg->options = NENCLOSE(node)->option; 02668 n = get_head_value_node(NENCLOSE(node)->target, exact, reg); 02669 reg->options = options; 02670 } 02671 break; 02672 02673 case ENCLOSE_MEMORY: 02674 case ENCLOSE_STOP_BACKTRACK: 02675 n = get_head_value_node(en->target, exact, reg); 02676 break; 02677 } 02678 } 02679 break; 02680 02681 case NT_ANCHOR: 02682 if (NANCHOR(node)->type == ANCHOR_PREC_READ) 02683 n = get_head_value_node(NANCHOR(node)->target, exact, reg); 02684 break; 02685 02686 default: 02687 break; 02688 } 02689 02690 return n; 02691 } 02692 02693 static int 02694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) 02695 { 02696 int type, r = 0; 02697 02698 type = NTYPE(node); 02699 if ((NTYPE2BIT(type) & type_mask) == 0) 02700 return 1; 02701 02702 switch (type) { 02703 case NT_LIST: 02704 case NT_ALT: 02705 do { 02706 r = check_type_tree(NCAR(node), type_mask, enclose_mask, 02707 anchor_mask); 02708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02709 break; 02710 02711 case NT_QTFR: 02712 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, 02713 anchor_mask); 02714 break; 02715 02716 case NT_ENCLOSE: 02717 { 02718 EncloseNode* en = NENCLOSE(node); 02719 if ((en->type & enclose_mask) == 0) 02720 return 1; 02721 02722 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); 02723 } 02724 break; 02725 02726 case NT_ANCHOR: 02727 type = NANCHOR(node)->type; 02728 if ((type & anchor_mask) == 0) 02729 return 1; 02730 02731 if (NANCHOR(node)->target) 02732 r = check_type_tree(NANCHOR(node)->target, 02733 type_mask, enclose_mask, anchor_mask); 02734 break; 02735 02736 default: 02737 break; 02738 } 02739 return r; 02740 } 02741 02742 #ifdef USE_SUBEXP_CALL 02743 02744 #define RECURSION_EXIST 1 02745 #define RECURSION_INFINITE 2 02746 02747 static int 02748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) 02749 { 02750 int type; 02751 int r = 0; 02752 02753 type = NTYPE(node); 02754 switch (type) { 02755 case NT_LIST: 02756 { 02757 Node *x; 02758 OnigDistance min; 02759 int ret; 02760 02761 x = node; 02762 do { 02763 ret = subexp_inf_recursive_check(NCAR(x), env, head); 02764 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02765 r |= ret; 02766 if (head) { 02767 ret = get_min_match_length(NCAR(x), &min, env); 02768 if (ret != 0) return ret; 02769 if (min != 0) head = 0; 02770 } 02771 } while (IS_NOT_NULL(x = NCDR(x))); 02772 } 02773 break; 02774 02775 case NT_ALT: 02776 { 02777 int ret; 02778 r = RECURSION_EXIST; 02779 do { 02780 ret = subexp_inf_recursive_check(NCAR(node), env, head); 02781 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02782 r &= ret; 02783 } while (IS_NOT_NULL(node = NCDR(node))); 02784 } 02785 break; 02786 02787 case NT_QTFR: 02788 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); 02789 if (r == RECURSION_EXIST) { 02790 if (NQTFR(node)->lower == 0) r = 0; 02791 } 02792 break; 02793 02794 case NT_ANCHOR: 02795 { 02796 AnchorNode* an = NANCHOR(node); 02797 switch (an->type) { 02798 case ANCHOR_PREC_READ: 02799 case ANCHOR_PREC_READ_NOT: 02800 case ANCHOR_LOOK_BEHIND: 02801 case ANCHOR_LOOK_BEHIND_NOT: 02802 r = subexp_inf_recursive_check(an->target, env, head); 02803 break; 02804 } 02805 } 02806 break; 02807 02808 case NT_CALL: 02809 r = subexp_inf_recursive_check(NCALL(node)->target, env, head); 02810 break; 02811 02812 case NT_ENCLOSE: 02813 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 02814 return 0; 02815 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 02816 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); 02817 else { 02818 SET_ENCLOSE_STATUS(node, NST_MARK2); 02819 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); 02820 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 02821 } 02822 break; 02823 02824 default: 02825 break; 02826 } 02827 02828 return r; 02829 } 02830 02831 static int 02832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) 02833 { 02834 int type; 02835 int r = 0; 02836 02837 type = NTYPE(node); 02838 switch (type) { 02839 case NT_LIST: 02840 case NT_ALT: 02841 do { 02842 r = subexp_inf_recursive_check_trav(NCAR(node), env); 02843 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02844 break; 02845 02846 case NT_QTFR: 02847 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); 02848 break; 02849 02850 case NT_ANCHOR: 02851 { 02852 AnchorNode* an = NANCHOR(node); 02853 switch (an->type) { 02854 case ANCHOR_PREC_READ: 02855 case ANCHOR_PREC_READ_NOT: 02856 case ANCHOR_LOOK_BEHIND: 02857 case ANCHOR_LOOK_BEHIND_NOT: 02858 r = subexp_inf_recursive_check_trav(an->target, env); 02859 break; 02860 } 02861 } 02862 break; 02863 02864 case NT_ENCLOSE: 02865 { 02866 EncloseNode* en = NENCLOSE(node); 02867 02868 if (IS_ENCLOSE_RECURSION(en)) { 02869 SET_ENCLOSE_STATUS(node, NST_MARK1); 02870 r = subexp_inf_recursive_check(en->target, env, 1); 02871 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; 02872 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 02873 } 02874 r = subexp_inf_recursive_check_trav(en->target, env); 02875 } 02876 02877 break; 02878 02879 default: 02880 break; 02881 } 02882 02883 return r; 02884 } 02885 02886 static int 02887 subexp_recursive_check(Node* node) 02888 { 02889 int r = 0; 02890 02891 switch (NTYPE(node)) { 02892 case NT_LIST: 02893 case NT_ALT: 02894 do { 02895 r |= subexp_recursive_check(NCAR(node)); 02896 } while (IS_NOT_NULL(node = NCDR(node))); 02897 break; 02898 02899 case NT_QTFR: 02900 r = subexp_recursive_check(NQTFR(node)->target); 02901 break; 02902 02903 case NT_ANCHOR: 02904 { 02905 AnchorNode* an = NANCHOR(node); 02906 switch (an->type) { 02907 case ANCHOR_PREC_READ: 02908 case ANCHOR_PREC_READ_NOT: 02909 case ANCHOR_LOOK_BEHIND: 02910 case ANCHOR_LOOK_BEHIND_NOT: 02911 r = subexp_recursive_check(an->target); 02912 break; 02913 } 02914 } 02915 break; 02916 02917 case NT_CALL: 02918 r = subexp_recursive_check(NCALL(node)->target); 02919 if (r != 0) SET_CALL_RECURSION(node); 02920 break; 02921 02922 case NT_ENCLOSE: 02923 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 02924 return 0; 02925 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 02926 return 1; /* recursion */ 02927 else { 02928 SET_ENCLOSE_STATUS(node, NST_MARK2); 02929 r = subexp_recursive_check(NENCLOSE(node)->target); 02930 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 02931 } 02932 break; 02933 02934 default: 02935 break; 02936 } 02937 02938 return r; 02939 } 02940 02941 02942 static int 02943 subexp_recursive_check_trav(Node* node, ScanEnv* env) 02944 { 02945 #define FOUND_CALLED_NODE 1 02946 02947 int type; 02948 int r = 0; 02949 02950 type = NTYPE(node); 02951 switch (type) { 02952 case NT_LIST: 02953 case NT_ALT: 02954 { 02955 int ret; 02956 do { 02957 ret = subexp_recursive_check_trav(NCAR(node), env); 02958 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; 02959 else if (ret < 0) return ret; 02960 } while (IS_NOT_NULL(node = NCDR(node))); 02961 } 02962 break; 02963 02964 case NT_QTFR: 02965 r = subexp_recursive_check_trav(NQTFR(node)->target, env); 02966 if (NQTFR(node)->upper == 0) { 02967 if (r == FOUND_CALLED_NODE) 02968 NQTFR(node)->is_refered = 1; 02969 } 02970 break; 02971 02972 case NT_ANCHOR: 02973 { 02974 AnchorNode* an = NANCHOR(node); 02975 switch (an->type) { 02976 case ANCHOR_PREC_READ: 02977 case ANCHOR_PREC_READ_NOT: 02978 case ANCHOR_LOOK_BEHIND: 02979 case ANCHOR_LOOK_BEHIND_NOT: 02980 r = subexp_recursive_check_trav(an->target, env); 02981 break; 02982 } 02983 } 02984 break; 02985 02986 case NT_ENCLOSE: 02987 { 02988 EncloseNode* en = NENCLOSE(node); 02989 02990 if (! IS_ENCLOSE_RECURSION(en)) { 02991 if (IS_ENCLOSE_CALLED(en)) { 02992 SET_ENCLOSE_STATUS(node, NST_MARK1); 02993 r = subexp_recursive_check(en->target); 02994 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); 02995 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 02996 } 02997 } 02998 r = subexp_recursive_check_trav(en->target, env); 02999 if (IS_ENCLOSE_CALLED(en)) 03000 r |= FOUND_CALLED_NODE; 03001 } 03002 break; 03003 03004 default: 03005 break; 03006 } 03007 03008 return r; 03009 } 03010 03011 static int 03012 setup_subexp_call(Node* node, ScanEnv* env) 03013 { 03014 int type; 03015 int r = 0; 03016 03017 type = NTYPE(node); 03018 switch (type) { 03019 case NT_LIST: 03020 do { 03021 r = setup_subexp_call(NCAR(node), env); 03022 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03023 break; 03024 03025 case NT_ALT: 03026 do { 03027 r = setup_subexp_call(NCAR(node), env); 03028 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03029 break; 03030 03031 case NT_QTFR: 03032 r = setup_subexp_call(NQTFR(node)->target, env); 03033 break; 03034 case NT_ENCLOSE: 03035 r = setup_subexp_call(NENCLOSE(node)->target, env); 03036 break; 03037 03038 case NT_CALL: 03039 { 03040 CallNode* cn = NCALL(node); 03041 Node** nodes = SCANENV_MEM_NODES(env); 03042 03043 if (cn->group_num != 0) { 03044 int gnum = cn->group_num; 03045 03046 #ifdef USE_NAMED_GROUP 03047 if (env->num_named > 0 && 03048 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 03049 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { 03050 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 03051 } 03052 #endif 03053 if (gnum > env->num_mem) { 03054 onig_scan_env_set_error_string(env, 03055 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); 03056 return ONIGERR_UNDEFINED_GROUP_REFERENCE; 03057 } 03058 03059 #ifdef USE_NAMED_GROUP 03060 set_call_attr: 03061 #endif 03062 cn->target = nodes[cn->group_num]; 03063 if (IS_NULL(cn->target)) { 03064 onig_scan_env_set_error_string(env, 03065 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03066 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03067 } 03068 SET_ENCLOSE_STATUS(cn->target, NST_CALLED); 03069 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); 03070 cn->unset_addr_list = env->unset_addr_list; 03071 } 03072 #ifdef USE_NAMED_GROUP 03073 else { 03074 int *refs; 03075 03076 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, 03077 &refs); 03078 if (n <= 0) { 03079 onig_scan_env_set_error_string(env, 03080 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03081 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03082 } 03083 else if (n > 1) { 03084 onig_scan_env_set_error_string(env, 03085 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); 03086 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; 03087 } 03088 else { 03089 cn->group_num = refs[0]; 03090 goto set_call_attr; 03091 } 03092 } 03093 #endif 03094 } 03095 break; 03096 03097 case NT_ANCHOR: 03098 { 03099 AnchorNode* an = NANCHOR(node); 03100 03101 switch (an->type) { 03102 case ANCHOR_PREC_READ: 03103 case ANCHOR_PREC_READ_NOT: 03104 case ANCHOR_LOOK_BEHIND: 03105 case ANCHOR_LOOK_BEHIND_NOT: 03106 r = setup_subexp_call(an->target, env); 03107 break; 03108 } 03109 } 03110 break; 03111 03112 default: 03113 break; 03114 } 03115 03116 return r; 03117 } 03118 #endif 03119 03120 /* divide different length alternatives in look-behind. 03121 (?<=A|B) ==> (?<=A)|(?<=B) 03122 (?<!A|B) ==> (?<!A)(?<!B) 03123 */ 03124 static int 03125 divide_look_behind_alternatives(Node* node) 03126 { 03127 Node *head, *np, *insert_node; 03128 AnchorNode* an = NANCHOR(node); 03129 int anc_type = an->type; 03130 03131 head = an->target; 03132 np = NCAR(head); 03133 swap_node(node, head); 03134 NCAR(node) = head; 03135 NANCHOR(head)->target = np; 03136 03137 np = node; 03138 while ((np = NCDR(np)) != NULL_NODE) { 03139 insert_node = onig_node_new_anchor(anc_type); 03140 CHECK_NULL_RETURN_MEMERR(insert_node); 03141 NANCHOR(insert_node)->target = NCAR(np); 03142 NCAR(np) = insert_node; 03143 } 03144 03145 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { 03146 np = node; 03147 do { 03148 SET_NTYPE(np, NT_LIST); /* alt -> list */ 03149 } while ((np = NCDR(np)) != NULL_NODE); 03150 } 03151 return 0; 03152 } 03153 03154 static int 03155 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) 03156 { 03157 int r, len; 03158 AnchorNode* an = NANCHOR(node); 03159 03160 r = get_char_length_tree(an->target, reg, &len); 03161 if (r == 0) 03162 an->char_len = len; 03163 else if (r == GET_CHAR_LEN_VARLEN) 03164 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03165 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { 03166 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) 03167 r = divide_look_behind_alternatives(node); 03168 else 03169 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03170 } 03171 03172 return r; 03173 } 03174 03175 static int 03176 next_setup(Node* node, Node* next_node, regex_t* reg) 03177 { 03178 int type; 03179 03180 retry: 03181 type = NTYPE(node); 03182 if (type == NT_QTFR) { 03183 QtfrNode* qn = NQTFR(node); 03184 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { 03185 #ifdef USE_QTFR_PEEK_NEXT 03186 Node* n = get_head_value_node(next_node, 1, reg); 03187 /* '\0': for UTF-16BE etc... */ 03188 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { 03189 qn->next_head_exact = n; 03190 } 03191 #endif 03192 /* automatic posseivation a*b ==> (?>a*)b */ 03193 if (qn->lower <= 1) { 03194 int ttype = NTYPE(qn->target); 03195 if (IS_NODE_TYPE_SIMPLE(ttype)) { 03196 Node *x, *y; 03197 x = get_head_value_node(qn->target, 0, reg); 03198 if (IS_NOT_NULL(x)) { 03199 y = get_head_value_node(next_node, 0, reg); 03200 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { 03201 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); 03202 CHECK_NULL_RETURN_MEMERR(en); 03203 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); 03204 swap_node(node, en); 03205 NENCLOSE(node)->target = en; 03206 } 03207 } 03208 } 03209 } 03210 } 03211 } 03212 else if (type == NT_ENCLOSE) { 03213 EncloseNode* en = NENCLOSE(node); 03214 if (en->type == ENCLOSE_MEMORY) { 03215 node = en->target; 03216 goto retry; 03217 } 03218 } 03219 return 0; 03220 } 03221 03222 03223 static int 03224 update_string_node_case_fold(regex_t* reg, Node *node) 03225 { 03226 UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 03227 UChar *sbuf, *ebuf, *sp; 03228 int r, i, len; 03229 OnigDistance sbuf_size; 03230 StrNode* sn = NSTR(node); 03231 03232 end = sn->end; 03233 sbuf_size = (end - sn->s) * 2; 03234 sbuf = (UChar* )xmalloc(sbuf_size); 03235 CHECK_NULL_RETURN_MEMERR(sbuf); 03236 ebuf = sbuf + sbuf_size; 03237 03238 sp = sbuf; 03239 p = sn->s; 03240 while (p < end) { 03241 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); 03242 q = buf; 03243 for (i = 0; i < len; i++) { 03244 if (sp >= ebuf) { 03245 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); 03246 CHECK_NULL_RETURN_MEMERR(sbuf); 03247 sp = sbuf + sbuf_size; 03248 sbuf_size *= 2; 03249 ebuf = sbuf + sbuf_size; 03250 } 03251 03252 *sp++ = buf[i]; 03253 } 03254 } 03255 03256 r = onig_node_str_set(node, sbuf, sp); 03257 if (r != 0) { 03258 xfree(sbuf); 03259 return r; 03260 } 03261 03262 xfree(sbuf); 03263 return 0; 03264 } 03265 03266 static int 03267 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, 03268 regex_t* reg) 03269 { 03270 int r; 03271 Node *node; 03272 03273 node = onig_node_new_str(s, end); 03274 if (IS_NULL(node)) return ONIGERR_MEMORY; 03275 03276 r = update_string_node_case_fold(reg, node); 03277 if (r != 0) { 03278 onig_node_free(node); 03279 return r; 03280 } 03281 03282 NSTRING_SET_AMBIG(node); 03283 NSTRING_SET_DONT_GET_OPT_INFO(node); 03284 *rnode = node; 03285 return 0; 03286 } 03287 03288 static int 03289 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], 03290 UChar *p, int slen, UChar *end, 03291 regex_t* reg, Node **rnode) 03292 { 03293 int r, i, j, len, varlen; 03294 Node *anode, *var_anode, *snode, *xnode, *an; 03295 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 03296 03297 *rnode = var_anode = NULL_NODE; 03298 03299 varlen = 0; 03300 for (i = 0; i < item_num; i++) { 03301 if (items[i].byte_len != slen) { 03302 varlen = 1; 03303 break; 03304 } 03305 } 03306 03307 if (varlen != 0) { 03308 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03309 if (IS_NULL(var_anode)) return ONIGERR_MEMORY; 03310 03311 xnode = onig_node_new_list(NULL, NULL); 03312 if (IS_NULL(xnode)) goto mem_err; 03313 NCAR(var_anode) = xnode; 03314 03315 anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03316 if (IS_NULL(anode)) goto mem_err; 03317 NCAR(xnode) = anode; 03318 } 03319 else { 03320 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03321 if (IS_NULL(anode)) return ONIGERR_MEMORY; 03322 } 03323 03324 snode = onig_node_new_str(p, p + slen); 03325 if (IS_NULL(snode)) goto mem_err; 03326 03327 NCAR(anode) = snode; 03328 03329 for (i = 0; i < item_num; i++) { 03330 snode = onig_node_new_str(NULL, NULL); 03331 if (IS_NULL(snode)) goto mem_err; 03332 03333 for (j = 0; j < items[i].code_len; j++) { 03334 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); 03335 if (len < 0) { 03336 r = len; 03337 goto mem_err2; 03338 } 03339 03340 r = onig_node_str_cat(snode, buf, buf + len); 03341 if (r != 0) goto mem_err2; 03342 } 03343 03344 an = onig_node_new_alt(NULL_NODE, NULL_NODE); 03345 if (IS_NULL(an)) { 03346 goto mem_err2; 03347 } 03348 03349 if (items[i].byte_len != slen) { 03350 Node *rem; 03351 UChar *q = p + items[i].byte_len; 03352 03353 if (q < end) { 03354 r = expand_case_fold_make_rem_string(&rem, q, end, reg); 03355 if (r != 0) { 03356 onig_node_free(an); 03357 goto mem_err2; 03358 } 03359 03360 xnode = onig_node_list_add(NULL_NODE, snode); 03361 if (IS_NULL(xnode)) { 03362 onig_node_free(an); 03363 onig_node_free(rem); 03364 goto mem_err2; 03365 } 03366 if (IS_NULL(onig_node_list_add(xnode, rem))) { 03367 onig_node_free(an); 03368 onig_node_free(xnode); 03369 onig_node_free(rem); 03370 goto mem_err; 03371 } 03372 03373 NCAR(an) = xnode; 03374 } 03375 else { 03376 NCAR(an) = snode; 03377 } 03378 03379 NCDR(var_anode) = an; 03380 var_anode = an; 03381 } 03382 else { 03383 NCAR(an) = snode; 03384 NCDR(anode) = an; 03385 anode = an; 03386 } 03387 } 03388 03389 return varlen; 03390 03391 mem_err2: 03392 onig_node_free(snode); 03393 03394 mem_err: 03395 onig_node_free(*rnode); 03396 03397 return ONIGERR_MEMORY; 03398 } 03399 03400 static int 03401 expand_case_fold_string(Node* node, regex_t* reg) 03402 { 03403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 03404 03405 int r, n, len, alt_num; 03406 UChar *start, *end, *p; 03407 Node *top_root, *root, *snode, *prev_node; 03408 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 03409 StrNode* sn = NSTR(node); 03410 03411 if (NSTRING_IS_AMBIG(node)) return 0; 03412 03413 start = sn->s; 03414 end = sn->end; 03415 if (start >= end) return 0; 03416 03417 r = 0; 03418 top_root = root = prev_node = snode = NULL_NODE; 03419 alt_num = 1; 03420 p = start; 03421 while (p < end) { 03422 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, 03423 p, end, items); 03424 if (n < 0) { 03425 r = n; 03426 goto err; 03427 } 03428 03429 len = enclen(reg->enc, p, end); 03430 03431 if (n == 0) { 03432 if (IS_NULL(snode)) { 03433 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03434 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03435 if (IS_NULL(root)) { 03436 onig_node_free(prev_node); 03437 goto mem_err; 03438 } 03439 } 03440 03441 prev_node = snode = onig_node_new_str(NULL, NULL); 03442 if (IS_NULL(snode)) goto mem_err; 03443 if (IS_NOT_NULL(root)) { 03444 if (IS_NULL(onig_node_list_add(root, snode))) { 03445 onig_node_free(snode); 03446 goto mem_err; 03447 } 03448 } 03449 } 03450 03451 r = onig_node_str_cat(snode, p, p + len); 03452 if (r != 0) goto err; 03453 } 03454 else { 03455 alt_num *= (n + 1); 03456 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; 03457 03458 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03459 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03460 if (IS_NULL(root)) { 03461 onig_node_free(prev_node); 03462 goto mem_err; 03463 } 03464 } 03465 03466 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); 03467 if (r < 0) goto mem_err; 03468 if (r == 1) { 03469 if (IS_NULL(root)) { 03470 top_root = prev_node; 03471 } 03472 else { 03473 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03474 onig_node_free(prev_node); 03475 goto mem_err; 03476 } 03477 } 03478 03479 root = NCAR(prev_node); 03480 } 03481 else { /* r == 0 */ 03482 if (IS_NOT_NULL(root)) { 03483 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03484 onig_node_free(prev_node); 03485 goto mem_err; 03486 } 03487 } 03488 } 03489 03490 snode = NULL_NODE; 03491 } 03492 03493 p += len; 03494 } 03495 03496 if (p < end) { 03497 Node *srem; 03498 03499 r = expand_case_fold_make_rem_string(&srem, p, end, reg); 03500 if (r != 0) goto mem_err; 03501 03502 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { 03503 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03504 if (IS_NULL(root)) { 03505 onig_node_free(srem); 03506 onig_node_free(prev_node); 03507 goto mem_err; 03508 } 03509 } 03510 03511 if (IS_NULL(root)) { 03512 prev_node = srem; 03513 } 03514 else { 03515 if (IS_NULL(onig_node_list_add(root, srem))) { 03516 onig_node_free(srem); 03517 goto mem_err; 03518 } 03519 } 03520 } 03521 03522 /* ending */ 03523 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); 03524 swap_node(node, top_root); 03525 onig_node_free(top_root); 03526 return 0; 03527 03528 mem_err: 03529 r = ONIGERR_MEMORY; 03530 03531 err: 03532 onig_node_free(top_root); 03533 return r; 03534 } 03535 03536 03537 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03538 03539 #define CEC_THRES_NUM_BIG_REPEAT 512 03540 #define CEC_INFINITE_NUM 0x7fffffff 03541 03542 #define CEC_IN_INFINITE_REPEAT (1<<0) 03543 #define CEC_IN_FINITE_REPEAT (1<<1) 03544 #define CEC_CONT_BIG_REPEAT (1<<2) 03545 03546 static int 03547 setup_comb_exp_check(Node* node, int state, ScanEnv* env) 03548 { 03549 int type; 03550 int r = state; 03551 03552 type = NTYPE(node); 03553 switch (type) { 03554 case NT_LIST: 03555 { 03556 Node* prev = NULL_NODE; 03557 do { 03558 r = setup_comb_exp_check(NCAR(node), r, env); 03559 prev = NCAR(node); 03560 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); 03561 } 03562 break; 03563 03564 case NT_ALT: 03565 { 03566 int ret; 03567 do { 03568 ret = setup_comb_exp_check(NCAR(node), state, env); 03569 r |= ret; 03570 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); 03571 } 03572 break; 03573 03574 case NT_QTFR: 03575 { 03576 int child_state = state; 03577 int add_state = 0; 03578 QtfrNode* qn = NQTFR(node); 03579 Node* target = qn->target; 03580 int var_num; 03581 03582 if (! IS_REPEAT_INFINITE(qn->upper)) { 03583 if (qn->upper > 1) { 03584 /* {0,1}, {1,1} are allowed */ 03585 child_state |= CEC_IN_FINITE_REPEAT; 03586 03587 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ 03588 if (env->backrefed_mem == 0) { 03589 if (NTYPE(qn->target) == NT_ENCLOSE) { 03590 EncloseNode* en = NENCLOSE(qn->target); 03591 if (en->type == ENCLOSE_MEMORY) { 03592 if (NTYPE(en->target) == NT_QTFR) { 03593 QtfrNode* q = NQTFR(en->target); 03594 if (IS_REPEAT_INFINITE(q->upper) 03595 && q->greedy == qn->greedy) { 03596 qn->upper = (qn->lower == 0 ? 1 : qn->lower); 03597 if (qn->upper == 1) 03598 child_state = state; 03599 } 03600 } 03601 } 03602 } 03603 } 03604 } 03605 } 03606 03607 if (state & CEC_IN_FINITE_REPEAT) { 03608 qn->comb_exp_check_num = -1; 03609 } 03610 else { 03611 if (IS_REPEAT_INFINITE(qn->upper)) { 03612 var_num = CEC_INFINITE_NUM; 03613 child_state |= CEC_IN_INFINITE_REPEAT; 03614 } 03615 else { 03616 var_num = qn->upper - qn->lower; 03617 } 03618 03619 if (var_num >= CEC_THRES_NUM_BIG_REPEAT) 03620 add_state |= CEC_CONT_BIG_REPEAT; 03621 03622 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || 03623 ((state & CEC_CONT_BIG_REPEAT) != 0 && 03624 var_num >= CEC_THRES_NUM_BIG_REPEAT)) { 03625 if (qn->comb_exp_check_num == 0) { 03626 env->num_comb_exp_check++; 03627 qn->comb_exp_check_num = env->num_comb_exp_check; 03628 if (env->curr_max_regnum > env->comb_exp_max_regnum) 03629 env->comb_exp_max_regnum = env->curr_max_regnum; 03630 } 03631 } 03632 } 03633 03634 r = setup_comb_exp_check(target, child_state, env); 03635 r |= add_state; 03636 } 03637 break; 03638 03639 case NT_ENCLOSE: 03640 { 03641 EncloseNode* en = NENCLOSE(node); 03642 03643 switch (en->type) { 03644 case ENCLOSE_MEMORY: 03645 { 03646 if (env->curr_max_regnum < en->regnum) 03647 env->curr_max_regnum = en->regnum; 03648 03649 r = setup_comb_exp_check(en->target, state, env); 03650 } 03651 break; 03652 03653 default: 03654 r = setup_comb_exp_check(en->target, state, env); 03655 break; 03656 } 03657 } 03658 break; 03659 03660 #ifdef USE_SUBEXP_CALL 03661 case NT_CALL: 03662 if (IS_CALL_RECURSION(NCALL(node))) 03663 env->has_recursion = 1; 03664 else 03665 r = setup_comb_exp_check(NCALL(node)->target, state, env); 03666 break; 03667 #endif 03668 03669 default: 03670 break; 03671 } 03672 03673 return r; 03674 } 03675 #endif 03676 03677 #define IN_ALT (1<<0) 03678 #define IN_NOT (1<<1) 03679 #define IN_REPEAT (1<<2) 03680 #define IN_VAR_REPEAT (1<<3) 03681 03682 /* setup_tree does the following work. 03683 1. check empty loop. (set qn->target_empty_info) 03684 2. expand ignore-case in char class. 03685 3. set memory status bit flags. (reg->mem_stats) 03686 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 03687 5. find invalid patterns in look-behind. 03688 6. expand repeated string. 03689 */ 03690 static int 03691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) 03692 { 03693 int type; 03694 int r = 0; 03695 03696 restart: 03697 type = NTYPE(node); 03698 switch (type) { 03699 case NT_LIST: 03700 { 03701 Node* prev = NULL_NODE; 03702 do { 03703 r = setup_tree(NCAR(node), reg, state, env); 03704 if (IS_NOT_NULL(prev) && r == 0) { 03705 r = next_setup(prev, NCAR(node), reg); 03706 } 03707 prev = NCAR(node); 03708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03709 } 03710 break; 03711 03712 case NT_ALT: 03713 do { 03714 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); 03715 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03716 break; 03717 03718 case NT_CCLASS: 03719 break; 03720 03721 case NT_STR: 03722 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { 03723 r = expand_case_fold_string(node, reg); 03724 } 03725 break; 03726 03727 case NT_CTYPE: 03728 case NT_CANY: 03729 break; 03730 03731 #ifdef USE_SUBEXP_CALL 03732 case NT_CALL: 03733 break; 03734 #endif 03735 03736 case NT_BREF: 03737 { 03738 int i; 03739 int* p; 03740 Node** nodes = SCANENV_MEM_NODES(env); 03741 BRefNode* br = NBREF(node); 03742 p = BACKREFS_P(br); 03743 for (i = 0; i < br->back_num; i++) { 03744 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 03745 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); 03746 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); 03747 #ifdef USE_BACKREF_WITH_LEVEL 03748 if (IS_BACKREF_NEST_LEVEL(br)) { 03749 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); 03750 } 03751 #endif 03752 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); 03753 } 03754 } 03755 break; 03756 03757 case NT_QTFR: 03758 { 03759 OnigDistance d; 03760 QtfrNode* qn = NQTFR(node); 03761 Node* target = qn->target; 03762 03763 if ((state & IN_REPEAT) != 0) { 03764 qn->state |= NST_IN_REPEAT; 03765 } 03766 03767 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { 03768 r = get_min_match_length(target, &d, env); 03769 if (r) break; 03770 if (d == 0) { 03771 qn->target_empty_info = NQ_TARGET_IS_EMPTY; 03772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 03773 r = quantifiers_memory_node_info(target); 03774 if (r < 0) break; 03775 if (r > 0) { 03776 qn->target_empty_info = r; 03777 } 03778 #endif 03779 #if 0 03780 r = get_max_match_length(target, &d, env); 03781 if (r == 0 && d == 0) { 03782 /* ()* ==> ()?, ()+ ==> () */ 03783 qn->upper = 1; 03784 if (qn->lower > 1) qn->lower = 1; 03785 if (NTYPE(target) == NT_STR) { 03786 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ 03787 } 03788 } 03789 #endif 03790 } 03791 } 03792 03793 state |= IN_REPEAT; 03794 if (qn->lower != qn->upper) 03795 state |= IN_VAR_REPEAT; 03796 r = setup_tree(target, reg, state, env); 03797 if (r) break; 03798 03799 /* expand string */ 03800 #define EXPAND_STRING_MAX_LENGTH 100 03801 if (NTYPE(target) == NT_STR) { 03802 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && 03803 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { 03804 OnigDistance len = NSTRING_LEN(target); 03805 StrNode* sn = NSTR(target); 03806 03807 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { 03808 int i, n = qn->lower; 03809 onig_node_conv_to_str_node(node, NSTR(target)->flag); 03810 for (i = 0; i < n; i++) { 03811 r = onig_node_str_cat(node, sn->s, sn->end); 03812 if (r) break; 03813 } 03814 onig_node_free(target); 03815 break; /* break case NT_QTFR: */ 03816 } 03817 } 03818 } 03819 03820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT 03821 if (qn->greedy && (qn->target_empty_info != 0)) { 03822 if (NTYPE(target) == NT_QTFR) { 03823 QtfrNode* tqn = NQTFR(target); 03824 if (IS_NOT_NULL(tqn->head_exact)) { 03825 qn->head_exact = tqn->head_exact; 03826 tqn->head_exact = NULL; 03827 } 03828 } 03829 else { 03830 qn->head_exact = get_head_value_node(qn->target, 1, reg); 03831 } 03832 } 03833 #endif 03834 } 03835 break; 03836 03837 case NT_ENCLOSE: 03838 { 03839 EncloseNode* en = NENCLOSE(node); 03840 03841 switch (en->type) { 03842 case ENCLOSE_OPTION: 03843 { 03844 OnigOptionType options = reg->options; 03845 reg->options = NENCLOSE(node)->option; 03846 r = setup_tree(NENCLOSE(node)->target, reg, state, env); 03847 reg->options = options; 03848 } 03849 break; 03850 03851 case ENCLOSE_MEMORY: 03852 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 03853 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); 03854 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 03855 } 03856 r = setup_tree(en->target, reg, state, env); 03857 break; 03858 03859 case ENCLOSE_STOP_BACKTRACK: 03860 { 03861 Node* target = en->target; 03862 r = setup_tree(target, reg, state, env); 03863 if (NTYPE(target) == NT_QTFR) { 03864 QtfrNode* tqn = NQTFR(target); 03865 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && 03866 tqn->greedy != 0) { /* (?>a*), a*+ etc... */ 03867 int qtype = NTYPE(tqn->target); 03868 if (IS_NODE_TYPE_SIMPLE(qtype)) 03869 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); 03870 } 03871 } 03872 } 03873 break; 03874 } 03875 } 03876 break; 03877 03878 case NT_ANCHOR: 03879 { 03880 AnchorNode* an = NANCHOR(node); 03881 03882 switch (an->type) { 03883 case ANCHOR_PREC_READ: 03884 r = setup_tree(an->target, reg, state, env); 03885 break; 03886 case ANCHOR_PREC_READ_NOT: 03887 r = setup_tree(an->target, reg, (state | IN_NOT), env); 03888 break; 03889 03890 /* allowed node types in look-behind */ 03891 #define ALLOWED_TYPE_IN_LB \ 03892 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ 03893 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) 03894 03895 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) 03896 #define ALLOWED_ENCLOSE_IN_LB_NOT 0 03897 03898 #define ALLOWED_ANCHOR_IN_LB \ 03899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) 03900 #define ALLOWED_ANCHOR_IN_LB_NOT \ 03901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) 03902 03903 case ANCHOR_LOOK_BEHIND: 03904 { 03905 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 03906 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); 03907 if (r < 0) return r; 03908 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03909 r = setup_look_behind(node, reg, env); 03910 if (r != 0) return r; 03911 if (NTYPE(node) != NT_ANCHOR) goto restart; 03912 r = setup_tree(an->target, reg, state, env); 03913 } 03914 break; 03915 03916 case ANCHOR_LOOK_BEHIND_NOT: 03917 { 03918 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 03919 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); 03920 if (r < 0) return r; 03921 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03922 r = setup_look_behind(node, reg, env); 03923 if (r != 0) return r; 03924 if (NTYPE(node) != NT_ANCHOR) goto restart; 03925 r = setup_tree(an->target, reg, (state | IN_NOT), env); 03926 } 03927 break; 03928 } 03929 } 03930 break; 03931 03932 default: 03933 break; 03934 } 03935 03936 return r; 03937 } 03938 03939 /* set skip map for Boyer-Moor search */ 03940 static int 03941 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, 03942 UChar skip[], int** int_skip) 03943 { 03944 OnigDistance i, len; 03945 03946 len = end - s; 03947 if (len < ONIG_CHAR_TABLE_SIZE) { 03948 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; 03949 03950 for (i = 0; i < len - 1; i++) 03951 skip[s[i]] = len - 1 - i; 03952 } 03953 else { 03954 if (IS_NULL(*int_skip)) { 03955 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 03956 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; 03957 } 03958 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len; 03959 03960 for (i = 0; i < len - 1; i++) 03961 (*int_skip)[s[i]] = (int)(len - 1 - i); 03962 } 03963 return 0; 03964 } 03965 03966 #define OPT_EXACT_MAXLEN 24 03967 03968 typedef struct { 03969 OnigDistance min; /* min byte length */ 03970 OnigDistance max; /* max byte length */ 03971 } MinMaxLen; 03972 03973 typedef struct { 03974 MinMaxLen mmd; 03975 OnigEncoding enc; 03976 OnigOptionType options; 03977 OnigCaseFoldType case_fold_flag; 03978 ScanEnv* scan_env; 03979 } OptEnv; 03980 03981 typedef struct { 03982 int left_anchor; 03983 int right_anchor; 03984 } OptAncInfo; 03985 03986 typedef struct { 03987 MinMaxLen mmd; /* info position */ 03988 OptAncInfo anc; 03989 03990 int reach_end; 03991 int ignore_case; 03992 int len; 03993 UChar s[OPT_EXACT_MAXLEN]; 03994 } OptExactInfo; 03995 03996 typedef struct { 03997 MinMaxLen mmd; /* info position */ 03998 OptAncInfo anc; 03999 04000 int value; /* weighted value */ 04001 UChar map[ONIG_CHAR_TABLE_SIZE]; 04002 } OptMapInfo; 04003 04004 typedef struct { 04005 MinMaxLen len; 04006 04007 OptAncInfo anc; 04008 OptExactInfo exb; /* boundary */ 04009 OptExactInfo exm; /* middle */ 04010 OptExactInfo expr; /* prec read (?=...) */ 04011 04012 OptMapInfo map; /* boundary */ 04013 } NodeOptInfo; 04014 04015 04016 static int 04017 map_position_value(OnigEncoding enc, int i) 04018 { 04019 static const short int ByteValTable[] = { 04020 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 04021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 04022 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 04023 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 04024 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04025 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, 04026 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04027 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 04028 }; 04029 04030 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { 04031 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) 04032 return 20; 04033 else 04034 return (int )ByteValTable[i]; 04035 } 04036 else 04037 return 4; /* Take it easy. */ 04038 } 04039 04040 static int 04041 distance_value(MinMaxLen* mm) 04042 { 04043 /* 1000 / (min-max-dist + 1) */ 04044 static const short int dist_vals[] = { 04045 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 04046 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 04047 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 04048 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 04049 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 04050 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 04051 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 04052 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 04053 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 04054 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 04055 }; 04056 04057 OnigDistance d; 04058 04059 if (mm->max == ONIG_INFINITE_DISTANCE) return 0; 04060 04061 d = mm->max - mm->min; 04062 if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) 04063 /* return dist_vals[d] * 16 / (mm->min + 12); */ 04064 return (int )dist_vals[d]; 04065 else 04066 return 1; 04067 } 04068 04069 static int 04070 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) 04071 { 04072 if (v2 <= 0) return -1; 04073 if (v1 <= 0) return 1; 04074 04075 v1 *= distance_value(d1); 04076 v2 *= distance_value(d2); 04077 04078 if (v2 > v1) return 1; 04079 if (v2 < v1) return -1; 04080 04081 if (d2->min < d1->min) return 1; 04082 if (d2->min > d1->min) return -1; 04083 return 0; 04084 } 04085 04086 static int 04087 is_equal_mml(MinMaxLen* a, MinMaxLen* b) 04088 { 04089 return (a->min == b->min && a->max == b->max) ? 1 : 0; 04090 } 04091 04092 04093 static void 04094 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) 04095 { 04096 mml->min = min; 04097 mml->max = max; 04098 } 04099 04100 static void 04101 clear_mml(MinMaxLen* mml) 04102 { 04103 mml->min = mml->max = 0; 04104 } 04105 04106 static void 04107 copy_mml(MinMaxLen* to, MinMaxLen* from) 04108 { 04109 to->min = from->min; 04110 to->max = from->max; 04111 } 04112 04113 static void 04114 add_mml(MinMaxLen* to, MinMaxLen* from) 04115 { 04116 to->min = distance_add(to->min, from->min); 04117 to->max = distance_add(to->max, from->max); 04118 } 04119 04120 #if 0 04121 static void 04122 add_len_mml(MinMaxLen* to, OnigDistance len) 04123 { 04124 to->min = distance_add(to->min, len); 04125 to->max = distance_add(to->max, len); 04126 } 04127 #endif 04128 04129 static void 04130 alt_merge_mml(MinMaxLen* to, MinMaxLen* from) 04131 { 04132 if (to->min > from->min) to->min = from->min; 04133 if (to->max < from->max) to->max = from->max; 04134 } 04135 04136 static void 04137 copy_opt_env(OptEnv* to, OptEnv* from) 04138 { 04139 *to = *from; 04140 } 04141 04142 static void 04143 clear_opt_anc_info(OptAncInfo* anc) 04144 { 04145 anc->left_anchor = 0; 04146 anc->right_anchor = 0; 04147 } 04148 04149 static void 04150 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) 04151 { 04152 *to = *from; 04153 } 04154 04155 static void 04156 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, 04157 OnigDistance left_len, OnigDistance right_len) 04158 { 04159 clear_opt_anc_info(to); 04160 04161 to->left_anchor = left->left_anchor; 04162 if (left_len == 0) { 04163 to->left_anchor |= right->left_anchor; 04164 } 04165 04166 to->right_anchor = right->right_anchor; 04167 if (right_len == 0) { 04168 to->right_anchor |= left->right_anchor; 04169 } 04170 } 04171 04172 static int 04173 is_left_anchor(int anc) 04174 { 04175 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || 04176 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || 04177 anc == ANCHOR_PREC_READ_NOT) 04178 return 0; 04179 04180 return 1; 04181 } 04182 04183 static int 04184 is_set_opt_anc_info(OptAncInfo* to, int anc) 04185 { 04186 if ((to->left_anchor & anc) != 0) return 1; 04187 04188 return ((to->right_anchor & anc) != 0 ? 1 : 0); 04189 } 04190 04191 static void 04192 add_opt_anc_info(OptAncInfo* to, int anc) 04193 { 04194 if (is_left_anchor(anc)) 04195 to->left_anchor |= anc; 04196 else 04197 to->right_anchor |= anc; 04198 } 04199 04200 static void 04201 remove_opt_anc_info(OptAncInfo* to, int anc) 04202 { 04203 if (is_left_anchor(anc)) 04204 to->left_anchor &= ~anc; 04205 else 04206 to->right_anchor &= ~anc; 04207 } 04208 04209 static void 04210 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) 04211 { 04212 to->left_anchor &= add->left_anchor; 04213 to->right_anchor &= add->right_anchor; 04214 } 04215 04216 static int 04217 is_full_opt_exact_info(OptExactInfo* ex) 04218 { 04219 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); 04220 } 04221 04222 static void 04223 clear_opt_exact_info(OptExactInfo* ex) 04224 { 04225 clear_mml(&ex->mmd); 04226 clear_opt_anc_info(&ex->anc); 04227 ex->reach_end = 0; 04228 ex->ignore_case = 0; 04229 ex->len = 0; 04230 ex->s[0] = '\0'; 04231 } 04232 04233 static void 04234 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) 04235 { 04236 *to = *from; 04237 } 04238 04239 static void 04240 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) 04241 { 04242 int i, j, len; 04243 UChar *p, *end; 04244 OptAncInfo tanc; 04245 04246 if (! to->ignore_case && add->ignore_case) { 04247 if (to->len >= add->len) return ; /* avoid */ 04248 04249 to->ignore_case = 1; 04250 } 04251 04252 p = add->s; 04253 end = p + add->len; 04254 for (i = to->len; p < end; ) { 04255 len = enclen(enc, p, end); 04256 if (i + len > OPT_EXACT_MAXLEN) break; 04257 for (j = 0; j < len && p < end; j++) 04258 to->s[i++] = *p++; 04259 } 04260 04261 to->len = i; 04262 to->reach_end = (p == end ? add->reach_end : 0); 04263 04264 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); 04265 if (! to->reach_end) tanc.right_anchor = 0; 04266 copy_opt_anc_info(&to->anc, &tanc); 04267 } 04268 04269 static void 04270 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, 04271 int raw ARG_UNUSED, OnigEncoding enc) 04272 { 04273 int i, j, len; 04274 UChar *p; 04275 04276 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { 04277 len = enclen(enc, p, end); 04278 if (i + len > OPT_EXACT_MAXLEN) break; 04279 for (j = 0; j < len && p < end; j++) 04280 to->s[i++] = *p++; 04281 } 04282 04283 to->len = i; 04284 } 04285 04286 static void 04287 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) 04288 { 04289 int i, j, len; 04290 04291 if (add->len == 0 || to->len == 0) { 04292 clear_opt_exact_info(to); 04293 return ; 04294 } 04295 04296 if (! is_equal_mml(&to->mmd, &add->mmd)) { 04297 clear_opt_exact_info(to); 04298 return ; 04299 } 04300 04301 for (i = 0; i < to->len && i < add->len; ) { 04302 if (to->s[i] != add->s[i]) break; 04303 len = enclen(env->enc, to->s + i, to->s + to->len); 04304 04305 for (j = 1; j < len; j++) { 04306 if (to->s[i+j] != add->s[i+j]) break; 04307 } 04308 if (j < len) break; 04309 i += len; 04310 } 04311 04312 if (! add->reach_end || i < add->len || i < to->len) { 04313 to->reach_end = 0; 04314 } 04315 to->len = i; 04316 to->ignore_case |= add->ignore_case; 04317 04318 alt_merge_opt_anc_info(&to->anc, &add->anc); 04319 if (! to->reach_end) to->anc.right_anchor = 0; 04320 } 04321 04322 static void 04323 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) 04324 { 04325 int v1, v2; 04326 04327 v1 = now->len; 04328 v2 = alt->len; 04329 04330 if (v2 == 0) { 04331 return ; 04332 } 04333 else if (v1 == 0) { 04334 copy_opt_exact_info(now, alt); 04335 return ; 04336 } 04337 else if (v1 <= 2 && v2 <= 2) { 04338 /* ByteValTable[x] is big value --> low price */ 04339 v2 = map_position_value(enc, now->s[0]); 04340 v1 = map_position_value(enc, alt->s[0]); 04341 04342 if (now->len > 1) v1 += 5; 04343 if (alt->len > 1) v2 += 5; 04344 } 04345 04346 if (now->ignore_case == 0) v1 *= 2; 04347 if (alt->ignore_case == 0) v2 *= 2; 04348 04349 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04350 copy_opt_exact_info(now, alt); 04351 } 04352 04353 static void 04354 clear_opt_map_info(OptMapInfo* map) 04355 { 04356 static const OptMapInfo clean_info = { 04357 {0, 0}, {0, 0}, 0, 04358 { 04359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04369 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 04375 } 04376 }; 04377 04378 xmemcpy(map, &clean_info, sizeof(OptMapInfo)); 04379 } 04380 04381 static void 04382 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) 04383 { 04384 *to = *from; 04385 } 04386 04387 static void 04388 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) 04389 { 04390 if (map->map[c] == 0) { 04391 map->map[c] = 1; 04392 map->value += map_position_value(enc, c); 04393 } 04394 } 04395 04396 static int 04397 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, 04398 OnigEncoding enc, OnigCaseFoldType case_fold_flag) 04399 { 04400 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04401 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 04402 int i, n; 04403 04404 add_char_opt_map_info(map, p[0], enc); 04405 04406 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); 04407 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); 04408 if (n < 0) return n; 04409 04410 for (i = 0; i < n; i++) { 04411 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); 04412 add_char_opt_map_info(map, buf[0], enc); 04413 } 04414 04415 return 0; 04416 } 04417 04418 static void 04419 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) 04420 { 04421 const int z = 1<<15; /* 32768: something big value */ 04422 04423 int v1, v2; 04424 04425 if (alt->value == 0) return ; 04426 if (now->value == 0) { 04427 copy_opt_map_info(now, alt); 04428 return ; 04429 } 04430 04431 v1 = z / now->value; 04432 v2 = z / alt->value; 04433 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04434 copy_opt_map_info(now, alt); 04435 } 04436 04437 static int 04438 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) 04439 { 04440 #define COMP_EM_BASE 20 04441 int ve, vm; 04442 04443 if (m->value <= 0) return -1; 04444 04445 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); 04446 vm = COMP_EM_BASE * 5 * 2 / m->value; 04447 return comp_distance_value(&e->mmd, &m->mmd, ve, vm); 04448 } 04449 04450 static void 04451 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) 04452 { 04453 int i, val; 04454 04455 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ 04456 if (to->value == 0) return ; 04457 if (add->value == 0 || to->mmd.max < add->mmd.min) { 04458 clear_opt_map_info(to); 04459 return ; 04460 } 04461 04462 alt_merge_mml(&to->mmd, &add->mmd); 04463 04464 val = 0; 04465 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 04466 if (add->map[i]) 04467 to->map[i] = 1; 04468 04469 if (to->map[i]) 04470 val += map_position_value(enc, i); 04471 } 04472 to->value = val; 04473 04474 alt_merge_opt_anc_info(&to->anc, &add->anc); 04475 } 04476 04477 static void 04478 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) 04479 { 04480 copy_mml(&(opt->exb.mmd), mmd); 04481 copy_mml(&(opt->expr.mmd), mmd); 04482 copy_mml(&(opt->map.mmd), mmd); 04483 } 04484 04485 static void 04486 clear_node_opt_info(NodeOptInfo* opt) 04487 { 04488 clear_mml(&opt->len); 04489 clear_opt_anc_info(&opt->anc); 04490 clear_opt_exact_info(&opt->exb); 04491 clear_opt_exact_info(&opt->exm); 04492 clear_opt_exact_info(&opt->expr); 04493 clear_opt_map_info(&opt->map); 04494 } 04495 04496 static void 04497 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) 04498 { 04499 *to = *from; 04500 } 04501 04502 static void 04503 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) 04504 { 04505 int exb_reach, exm_reach; 04506 OptAncInfo tanc; 04507 04508 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); 04509 copy_opt_anc_info(&to->anc, &tanc); 04510 04511 if (add->exb.len > 0 && to->len.max == 0) { 04512 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, 04513 to->len.max, add->len.max); 04514 copy_opt_anc_info(&add->exb.anc, &tanc); 04515 } 04516 04517 if (add->map.value > 0 && to->len.max == 0) { 04518 if (add->map.mmd.max == 0) 04519 add->map.anc.left_anchor |= to->anc.left_anchor; 04520 } 04521 04522 exb_reach = to->exb.reach_end; 04523 exm_reach = to->exm.reach_end; 04524 04525 if (add->len.max != 0) 04526 to->exb.reach_end = to->exm.reach_end = 0; 04527 04528 if (add->exb.len > 0) { 04529 if (exb_reach) { 04530 concat_opt_exact_info(&to->exb, &add->exb, enc); 04531 clear_opt_exact_info(&add->exb); 04532 } 04533 else if (exm_reach) { 04534 concat_opt_exact_info(&to->exm, &add->exb, enc); 04535 clear_opt_exact_info(&add->exb); 04536 } 04537 } 04538 select_opt_exact_info(enc, &to->exm, &add->exb); 04539 select_opt_exact_info(enc, &to->exm, &add->exm); 04540 04541 if (to->expr.len > 0) { 04542 if (add->len.max > 0) { 04543 if (to->expr.len > (int )add->len.max) 04544 to->expr.len = (int)add->len.max; 04545 04546 if (to->expr.mmd.max == 0) 04547 select_opt_exact_info(enc, &to->exb, &to->expr); 04548 else 04549 select_opt_exact_info(enc, &to->exm, &to->expr); 04550 } 04551 } 04552 else if (add->expr.len > 0) { 04553 copy_opt_exact_info(&to->expr, &add->expr); 04554 } 04555 04556 select_opt_map_info(&to->map, &add->map); 04557 04558 add_mml(&to->len, &add->len); 04559 } 04560 04561 static void 04562 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) 04563 { 04564 alt_merge_opt_anc_info (&to->anc, &add->anc); 04565 alt_merge_opt_exact_info(&to->exb, &add->exb, env); 04566 alt_merge_opt_exact_info(&to->exm, &add->exm, env); 04567 alt_merge_opt_exact_info(&to->expr, &add->expr, env); 04568 alt_merge_opt_map_info(env->enc, &to->map, &add->map); 04569 04570 alt_merge_mml(&to->len, &add->len); 04571 } 04572 04573 04574 #define MAX_NODE_OPT_INFO_REF_COUNT 5 04575 04576 static int 04577 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) 04578 { 04579 int type; 04580 int r = 0; 04581 04582 clear_node_opt_info(opt); 04583 set_bound_node_opt_info(opt, &env->mmd); 04584 04585 type = NTYPE(node); 04586 switch (type) { 04587 case NT_LIST: 04588 { 04589 OptEnv nenv; 04590 NodeOptInfo nopt; 04591 Node* nd = node; 04592 04593 copy_opt_env(&nenv, env); 04594 do { 04595 r = optimize_node_left(NCAR(nd), &nopt, &nenv); 04596 if (r == 0) { 04597 add_mml(&nenv.mmd, &nopt.len); 04598 concat_left_node_opt_info(env->enc, opt, &nopt); 04599 } 04600 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); 04601 } 04602 break; 04603 04604 case NT_ALT: 04605 { 04606 NodeOptInfo nopt; 04607 Node* nd = node; 04608 04609 do { 04610 r = optimize_node_left(NCAR(nd), &nopt, env); 04611 if (r == 0) { 04612 if (nd == node) copy_node_opt_info(opt, &nopt); 04613 else alt_merge_node_opt_info(opt, &nopt, env); 04614 } 04615 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); 04616 } 04617 break; 04618 04619 case NT_STR: 04620 { 04621 StrNode* sn = NSTR(node); 04622 OnigDistance slen = sn->end - sn->s; 04623 int is_raw = NSTRING_IS_RAW(node); 04624 04625 if (! NSTRING_IS_AMBIG(node)) { 04626 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04627 NSTRING_IS_RAW(node), env->enc); 04628 if (slen > 0) { 04629 add_char_opt_map_info(&opt->map, *(sn->s), env->enc); 04630 } 04631 set_mml(&opt->len, slen, slen); 04632 } 04633 else { 04634 OnigDistance max; 04635 04636 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { 04637 int n = onigenc_strlen(env->enc, sn->s, sn->end); 04638 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; 04639 } 04640 else { 04641 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04642 is_raw, env->enc); 04643 opt->exb.ignore_case = 1; 04644 04645 if (slen > 0) { 04646 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, 04647 env->enc, env->case_fold_flag); 04648 if (r != 0) break; 04649 } 04650 04651 max = slen; 04652 } 04653 04654 set_mml(&opt->len, slen, max); 04655 } 04656 04657 if ((OnigDistance)opt->exb.len == slen) 04658 opt->exb.reach_end = 1; 04659 } 04660 break; 04661 04662 case NT_CCLASS: 04663 { 04664 int i, z; 04665 CClassNode* cc = NCCLASS(node); 04666 04667 /* no need to check ignore case. (setted in setup_tree()) */ 04668 04669 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { 04670 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 04671 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 04672 04673 set_mml(&opt->len, min, max); 04674 } 04675 else { 04676 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 04677 z = BITSET_AT(cc->bs, i); 04678 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { 04679 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 04680 } 04681 } 04682 set_mml(&opt->len, 1, 1); 04683 } 04684 } 04685 break; 04686 04687 case NT_CTYPE: 04688 { 04689 int i, min, max; 04690 04691 max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 04692 04693 if (max == 1) { 04694 min = 1; 04695 04696 switch (NCTYPE(node)->ctype) { 04697 case ONIGENC_CTYPE_WORD: 04698 if (NCTYPE(node)->not != 0) { 04699 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 04700 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { 04701 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 04702 } 04703 } 04704 } 04705 else { 04706 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 04707 if (ONIGENC_IS_CODE_WORD(env->enc, i)) { 04708 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 04709 } 04710 } 04711 } 04712 break; 04713 } 04714 } 04715 else { 04716 min = ONIGENC_MBC_MINLEN(env->enc); 04717 } 04718 set_mml(&opt->len, min, max); 04719 } 04720 break; 04721 04722 case NT_CANY: 04723 { 04724 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 04725 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 04726 set_mml(&opt->len, min, max); 04727 } 04728 break; 04729 04730 case NT_ANCHOR: 04731 switch (NANCHOR(node)->type) { 04732 case ANCHOR_BEGIN_BUF: 04733 case ANCHOR_BEGIN_POSITION: 04734 case ANCHOR_BEGIN_LINE: 04735 case ANCHOR_END_BUF: 04736 case ANCHOR_SEMI_END_BUF: 04737 case ANCHOR_END_LINE: 04738 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */ 04739 add_opt_anc_info(&opt->anc, NANCHOR(node)->type); 04740 break; 04741 04742 case ANCHOR_PREC_READ: 04743 { 04744 NodeOptInfo nopt; 04745 04746 r = optimize_node_left(NANCHOR(node)->target, &nopt, env); 04747 if (r == 0) { 04748 if (nopt.exb.len > 0) 04749 copy_opt_exact_info(&opt->expr, &nopt.exb); 04750 else if (nopt.exm.len > 0) 04751 copy_opt_exact_info(&opt->expr, &nopt.exm); 04752 04753 opt->expr.reach_end = 0; 04754 04755 if (nopt.map.value > 0) 04756 copy_opt_map_info(&opt->map, &nopt.map); 04757 } 04758 } 04759 break; 04760 04761 case ANCHOR_PREC_READ_NOT: 04762 case ANCHOR_LOOK_BEHIND_NOT: 04763 break; 04764 } 04765 break; 04766 04767 case NT_BREF: 04768 { 04769 int i; 04770 int* backs; 04771 OnigDistance min, max, tmin, tmax; 04772 Node** nodes = SCANENV_MEM_NODES(env->scan_env); 04773 BRefNode* br = NBREF(node); 04774 04775 if (br->state & NST_RECURSION) { 04776 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 04777 break; 04778 } 04779 backs = BACKREFS_P(br); 04780 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); 04781 if (r != 0) break; 04782 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); 04783 if (r != 0) break; 04784 for (i = 1; i < br->back_num; i++) { 04785 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); 04786 if (r != 0) break; 04787 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); 04788 if (r != 0) break; 04789 if (min > tmin) min = tmin; 04790 if (max < tmax) max = tmax; 04791 } 04792 if (r == 0) set_mml(&opt->len, min, max); 04793 } 04794 break; 04795 04796 #ifdef USE_SUBEXP_CALL 04797 case NT_CALL: 04798 if (IS_CALL_RECURSION(NCALL(node))) 04799 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 04800 else { 04801 OnigOptionType save = env->options; 04802 env->options = NENCLOSE(NCALL(node)->target)->option; 04803 r = optimize_node_left(NCALL(node)->target, opt, env); 04804 env->options = save; 04805 } 04806 break; 04807 #endif 04808 04809 case NT_QTFR: 04810 { 04811 int i; 04812 OnigDistance min, max; 04813 NodeOptInfo nopt; 04814 QtfrNode* qn = NQTFR(node); 04815 04816 r = optimize_node_left(qn->target, &nopt, env); 04817 if (r) break; 04818 04819 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { 04820 if (env->mmd.max == 0 && 04821 NTYPE(qn->target) == NT_CANY && qn->greedy) { 04822 if (IS_MULTILINE(env->options)) 04823 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); 04824 else 04825 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); 04826 } 04827 } 04828 else { 04829 if (qn->lower > 0) { 04830 copy_node_opt_info(opt, &nopt); 04831 if (nopt.exb.len > 0) { 04832 if (nopt.exb.reach_end) { 04833 for (i = 2; i <= qn->lower && 04834 ! is_full_opt_exact_info(&opt->exb); i++) { 04835 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); 04836 } 04837 if (i < qn->lower) { 04838 opt->exb.reach_end = 0; 04839 } 04840 } 04841 } 04842 04843 if (qn->lower != qn->upper) { 04844 opt->exb.reach_end = 0; 04845 opt->exm.reach_end = 0; 04846 } 04847 if (qn->lower > 1) 04848 opt->exm.reach_end = 0; 04849 } 04850 } 04851 04852 min = distance_multiply(nopt.len.min, qn->lower); 04853 if (IS_REPEAT_INFINITE(qn->upper)) 04854 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); 04855 else 04856 max = distance_multiply(nopt.len.max, qn->upper); 04857 04858 set_mml(&opt->len, min, max); 04859 } 04860 break; 04861 04862 case NT_ENCLOSE: 04863 { 04864 EncloseNode* en = NENCLOSE(node); 04865 04866 switch (en->type) { 04867 case ENCLOSE_OPTION: 04868 { 04869 OnigOptionType save = env->options; 04870 04871 env->options = en->option; 04872 r = optimize_node_left(en->target, opt, env); 04873 env->options = save; 04874 } 04875 break; 04876 04877 case ENCLOSE_MEMORY: 04878 #ifdef USE_SUBEXP_CALL 04879 en->opt_count++; 04880 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { 04881 OnigDistance min, max; 04882 04883 min = 0; 04884 max = ONIG_INFINITE_DISTANCE; 04885 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; 04886 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; 04887 set_mml(&opt->len, min, max); 04888 } 04889 else 04890 #endif 04891 { 04892 r = optimize_node_left(en->target, opt, env); 04893 04894 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { 04895 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) 04896 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); 04897 } 04898 } 04899 break; 04900 04901 case ENCLOSE_STOP_BACKTRACK: 04902 r = optimize_node_left(en->target, opt, env); 04903 break; 04904 } 04905 } 04906 break; 04907 04908 default: 04909 #ifdef ONIG_DEBUG 04910 if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n", 04911 NTYPE(node)); 04912 #endif 04913 r = ONIGERR_TYPE_BUG; 04914 break; 04915 } 04916 04917 return r; 04918 } 04919 04920 static int 04921 set_optimize_exact_info(regex_t* reg, OptExactInfo* e) 04922 { 04923 int r; 04924 04925 if (e->len == 0) return 0; 04926 04927 if (e->ignore_case) { 04928 reg->exact = (UChar* )xmalloc(e->len); 04929 CHECK_NULL_RETURN_MEMERR(reg->exact); 04930 xmemcpy(reg->exact, e->s, e->len); 04931 reg->exact_end = reg->exact + e->len; 04932 reg->optimize = ONIG_OPTIMIZE_EXACT_IC; 04933 } 04934 else { 04935 int allow_reverse; 04936 04937 reg->exact = str_dup(e->s, e->s + e->len); 04938 CHECK_NULL_RETURN_MEMERR(reg->exact); 04939 reg->exact_end = reg->exact + e->len; 04940 04941 allow_reverse = 04942 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); 04943 04944 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { 04945 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 04946 reg->map, &(reg->int_map)); 04947 if (r) return r; 04948 04949 reg->optimize = (allow_reverse != 0 04950 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); 04951 } 04952 else { 04953 reg->optimize = ONIG_OPTIMIZE_EXACT; 04954 } 04955 } 04956 04957 reg->dmin = e->mmd.min; 04958 reg->dmax = e->mmd.max; 04959 04960 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 04961 reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact)); 04962 } 04963 04964 return 0; 04965 } 04966 04967 static void 04968 set_optimize_map_info(regex_t* reg, OptMapInfo* m) 04969 { 04970 int i; 04971 04972 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 04973 reg->map[i] = m->map[i]; 04974 04975 reg->optimize = ONIG_OPTIMIZE_MAP; 04976 reg->dmin = m->mmd.min; 04977 reg->dmax = m->mmd.max; 04978 04979 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 04980 reg->threshold_len = (int)(reg->dmin + 1); 04981 } 04982 } 04983 04984 static void 04985 set_sub_anchor(regex_t* reg, OptAncInfo* anc) 04986 { 04987 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; 04988 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; 04989 } 04990 04991 #ifdef ONIG_DEBUG 04992 static void print_optimize_info(FILE* f, regex_t* reg); 04993 #endif 04994 04995 static int 04996 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) 04997 { 04998 04999 int r; 05000 NodeOptInfo opt; 05001 OptEnv env; 05002 05003 env.enc = reg->enc; 05004 env.options = reg->options; 05005 env.case_fold_flag = reg->case_fold_flag; 05006 env.scan_env = scan_env; 05007 clear_mml(&env.mmd); 05008 05009 r = optimize_node_left(node, &opt, &env); 05010 if (r) return r; 05011 05012 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | 05013 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | 05014 ANCHOR_LOOK_BEHIND); 05015 05016 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); 05017 05018 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { 05019 reg->anchor_dmin = opt.len.min; 05020 reg->anchor_dmax = opt.len.max; 05021 } 05022 05023 if (opt.exb.len > 0 || opt.exm.len > 0) { 05024 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); 05025 if (opt.map.value > 0 && 05026 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { 05027 goto set_map; 05028 } 05029 else { 05030 r = set_optimize_exact_info(reg, &opt.exb); 05031 set_sub_anchor(reg, &opt.exb.anc); 05032 } 05033 } 05034 else if (opt.map.value > 0) { 05035 set_map: 05036 set_optimize_map_info(reg, &opt.map); 05037 set_sub_anchor(reg, &opt.map.anc); 05038 } 05039 else { 05040 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; 05041 if (opt.len.max == 0) 05042 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; 05043 } 05044 05045 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) 05046 if (!onig_is_prelude()) print_optimize_info(stderr, reg); 05047 #endif 05048 return r; 05049 } 05050 05051 static void 05052 clear_optimize_info(regex_t* reg) 05053 { 05054 reg->optimize = ONIG_OPTIMIZE_NONE; 05055 reg->anchor = 0; 05056 reg->anchor_dmin = 0; 05057 reg->anchor_dmax = 0; 05058 reg->sub_anchor = 0; 05059 reg->exact_end = (UChar* )NULL; 05060 reg->threshold_len = 0; 05061 if (IS_NOT_NULL(reg->exact)) { 05062 xfree(reg->exact); 05063 reg->exact = (UChar* )NULL; 05064 } 05065 } 05066 05067 #ifdef ONIG_DEBUG 05068 05069 static void print_enc_string(FILE* fp, OnigEncoding enc, 05070 const UChar *s, const UChar *end) 05071 { 05072 fprintf(fp, "\nPATTERN: /"); 05073 05074 if (ONIGENC_MBC_MINLEN(enc) > 1) { 05075 const UChar *p; 05076 OnigCodePoint code; 05077 05078 p = s; 05079 while (p < end) { 05080 code = ONIGENC_MBC_TO_CODE(enc, p, end); 05081 if (code >= 0x80) { 05082 fprintf(fp, " 0x%04x ", (int )code); 05083 } 05084 else { 05085 fputc((int )code, fp); 05086 } 05087 05088 p += enclen(enc, p, end); 05089 } 05090 } 05091 else { 05092 while (s < end) { 05093 fputc((int )*s, fp); 05094 s++; 05095 } 05096 } 05097 05098 fprintf(fp, "/ (%s)\n", enc->name); 05099 } 05100 05101 static void 05102 print_distance_range(FILE* f, OnigDistance a, OnigDistance b) 05103 { 05104 if (a == ONIG_INFINITE_DISTANCE) 05105 fputs("inf", f); 05106 else 05107 fprintf(f, "(%"PRIuSIZE")", a); 05108 05109 fputs("-", f); 05110 05111 if (b == ONIG_INFINITE_DISTANCE) 05112 fputs("inf", f); 05113 else 05114 fprintf(f, "(%"PRIuSIZE")", b); 05115 } 05116 05117 static void 05118 print_anchor(FILE* f, int anchor) 05119 { 05120 int q = 0; 05121 05122 fprintf(f, "["); 05123 05124 if (anchor & ANCHOR_BEGIN_BUF) { 05125 fprintf(f, "begin-buf"); 05126 q = 1; 05127 } 05128 if (anchor & ANCHOR_BEGIN_LINE) { 05129 if (q) fprintf(f, ", "); 05130 q = 1; 05131 fprintf(f, "begin-line"); 05132 } 05133 if (anchor & ANCHOR_BEGIN_POSITION) { 05134 if (q) fprintf(f, ", "); 05135 q = 1; 05136 fprintf(f, "begin-pos"); 05137 } 05138 if (anchor & ANCHOR_END_BUF) { 05139 if (q) fprintf(f, ", "); 05140 q = 1; 05141 fprintf(f, "end-buf"); 05142 } 05143 if (anchor & ANCHOR_SEMI_END_BUF) { 05144 if (q) fprintf(f, ", "); 05145 q = 1; 05146 fprintf(f, "semi-end-buf"); 05147 } 05148 if (anchor & ANCHOR_END_LINE) { 05149 if (q) fprintf(f, ", "); 05150 q = 1; 05151 fprintf(f, "end-line"); 05152 } 05153 if (anchor & ANCHOR_ANYCHAR_STAR) { 05154 if (q) fprintf(f, ", "); 05155 q = 1; 05156 fprintf(f, "anychar-star"); 05157 } 05158 if (anchor & ANCHOR_ANYCHAR_STAR_ML) { 05159 if (q) fprintf(f, ", "); 05160 fprintf(f, "anychar-star-pl"); 05161 } 05162 05163 fprintf(f, "]"); 05164 } 05165 05166 static void 05167 print_optimize_info(FILE* f, regex_t* reg) 05168 { 05169 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", 05170 "EXACT_IC", "MAP" }; 05171 05172 fprintf(f, "optimize: %s\n", on[reg->optimize]); 05173 fprintf(f, " anchor: "); print_anchor(f, reg->anchor); 05174 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) 05175 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); 05176 fprintf(f, "\n"); 05177 05178 if (reg->optimize) { 05179 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); 05180 fprintf(f, "\n"); 05181 } 05182 fprintf(f, "\n"); 05183 05184 if (reg->exact) { 05185 UChar *p; 05186 fprintf(f, "exact: ["); 05187 for (p = reg->exact; p < reg->exact_end; p++) { 05188 fputc(*p, f); 05189 } 05190 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact)); 05191 } 05192 else if (reg->optimize & ONIG_OPTIMIZE_MAP) { 05193 int c, i, n = 0; 05194 05195 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 05196 if (reg->map[i]) n++; 05197 05198 fprintf(f, "map: n=%d\n", n); 05199 if (n > 0) { 05200 c = 0; 05201 fputc('[', f); 05202 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 05203 if (reg->map[i] != 0) { 05204 if (c > 0) fputs(", ", f); 05205 c++; 05206 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && 05207 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) 05208 fputc(i, f); 05209 else 05210 fprintf(f, "%d", i); 05211 } 05212 } 05213 fprintf(f, "]\n"); 05214 } 05215 } 05216 } 05217 #endif /* ONIG_DEBUG */ 05218 05219 05220 extern void 05221 onig_free_body(regex_t* reg) 05222 { 05223 if (IS_NOT_NULL(reg)) { 05224 if (IS_NOT_NULL(reg->p)) xfree(reg->p); 05225 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); 05226 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); 05227 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); 05228 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); 05229 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); 05230 05231 #ifdef USE_NAMED_GROUP 05232 onig_names_free(reg); 05233 #endif 05234 } 05235 } 05236 05237 extern void 05238 onig_free(regex_t* reg) 05239 { 05240 if (IS_NOT_NULL(reg)) { 05241 onig_free_body(reg); 05242 xfree(reg); 05243 } 05244 } 05245 05246 size_t 05247 onig_memsize(const regex_t *reg) 05248 { 05249 size_t size = sizeof(regex_t); 05250 if (IS_NOT_NULL(reg->p)) size += reg->alloc; 05251 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; 05252 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05253 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05254 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); 05255 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); 05256 05257 return size; 05258 } 05259 05260 #define REGEX_TRANSFER(to,from) do {\ 05261 (to)->state = ONIG_STATE_MODIFY;\ 05262 onig_free_body(to);\ 05263 xmemcpy(to, from, sizeof(regex_t));\ 05264 xfree(from);\ 05265 } while (0) 05266 05267 extern void 05268 onig_transfer(regex_t* to, regex_t* from) 05269 { 05270 THREAD_ATOMIC_START; 05271 REGEX_TRANSFER(to, from); 05272 THREAD_ATOMIC_END; 05273 } 05274 05275 #define REGEX_CHAIN_HEAD(reg) do {\ 05276 while (IS_NOT_NULL((reg)->chain)) {\ 05277 (reg) = (reg)->chain;\ 05278 }\ 05279 } while (0) 05280 05281 extern void 05282 onig_chain_link_add(regex_t* to, regex_t* add) 05283 { 05284 THREAD_ATOMIC_START; 05285 REGEX_CHAIN_HEAD(to); 05286 to->chain = add; 05287 THREAD_ATOMIC_END; 05288 } 05289 05290 extern void 05291 onig_chain_reduce(regex_t* reg) 05292 { 05293 regex_t *head, *prev; 05294 05295 prev = reg; 05296 head = prev->chain; 05297 if (IS_NOT_NULL(head)) { 05298 reg->state = ONIG_STATE_MODIFY; 05299 while (IS_NOT_NULL(head->chain)) { 05300 prev = head; 05301 head = head->chain; 05302 } 05303 prev->chain = (regex_t* )NULL; 05304 REGEX_TRANSFER(reg, head); 05305 } 05306 } 05307 05308 #ifdef ONIG_DEBUG 05309 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); 05310 #endif 05311 #ifdef ONIG_DEBUG_PARSE_TREE 05312 static void print_tree P_((FILE* f, Node* node)); 05313 #endif 05314 05315 extern int 05316 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05317 OnigErrorInfo* einfo, const char *sourcefile, int sourceline) 05318 { 05319 #define COMPILE_INIT_SIZE 20 05320 05321 int r; 05322 OnigDistance init_size; 05323 Node* root; 05324 ScanEnv scan_env = {0}; 05325 #ifdef USE_SUBEXP_CALL 05326 UnsetAddrList uslist; 05327 #endif 05328 05329 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; 05330 05331 scan_env.sourcefile = sourcefile; 05332 scan_env.sourceline = sourceline; 05333 reg->state = ONIG_STATE_COMPILING; 05334 05335 #ifdef ONIG_DEBUG 05336 if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end); 05337 #endif 05338 05339 if (reg->alloc == 0) { 05340 init_size = (pattern_end - pattern) * 2; 05341 if (init_size <= 0) init_size = COMPILE_INIT_SIZE; 05342 r = BBUF_INIT(reg, init_size); 05343 if (r != 0) goto end; 05344 } 05345 else 05346 reg->used = 0; 05347 05348 reg->num_mem = 0; 05349 reg->num_repeat = 0; 05350 reg->num_null_check = 0; 05351 reg->repeat_range_alloc = 0; 05352 reg->repeat_range = (OnigRepeatRange* )NULL; 05353 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05354 reg->num_comb_exp_check = 0; 05355 #endif 05356 05357 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); 05358 if (r != 0) goto err; 05359 05360 #ifdef ONIG_DEBUG_PARSE_TREE 05361 # if 0 05362 fprintf(stderr, "ORIGINAL PARSE TREE:\n"); 05363 if (!onig_is_prelude()) { 05364 print_tree(stderr, root); 05365 } 05366 # endif 05367 #endif 05368 05369 #ifdef USE_NAMED_GROUP 05370 /* mixed use named group and no-named group */ 05371 if (scan_env.num_named > 0 && 05372 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 05373 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { 05374 if (scan_env.num_named != scan_env.num_mem) 05375 r = disable_noname_group_capture(&root, reg, &scan_env); 05376 else 05377 r = numbered_ref_check(root); 05378 05379 if (r != 0) goto err; 05380 } 05381 #endif 05382 05383 #ifdef USE_SUBEXP_CALL 05384 if (scan_env.num_call > 0) { 05385 r = unset_addr_list_init(&uslist, scan_env.num_call); 05386 if (r != 0) goto err; 05387 scan_env.unset_addr_list = &uslist; 05388 r = setup_subexp_call(root, &scan_env); 05389 if (r != 0) goto err_unset; 05390 r = subexp_recursive_check_trav(root, &scan_env); 05391 if (r < 0) goto err_unset; 05392 r = subexp_inf_recursive_check_trav(root, &scan_env); 05393 if (r != 0) goto err_unset; 05394 05395 reg->num_call = scan_env.num_call; 05396 } 05397 else 05398 reg->num_call = 0; 05399 #endif 05400 05401 r = setup_tree(root, reg, 0, &scan_env); 05402 if (r != 0) goto err_unset; 05403 05404 #ifdef ONIG_DEBUG_PARSE_TREE 05405 if (!onig_is_prelude()) print_tree(stderr, root); 05406 #endif 05407 05408 reg->capture_history = scan_env.capture_history; 05409 reg->bt_mem_start = scan_env.bt_mem_start; 05410 reg->bt_mem_start |= reg->capture_history; 05411 if (IS_FIND_CONDITION(reg->options)) 05412 BIT_STATUS_ON_ALL(reg->bt_mem_end); 05413 else { 05414 reg->bt_mem_end = scan_env.bt_mem_end; 05415 reg->bt_mem_end |= reg->capture_history; 05416 } 05417 05418 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05419 if (scan_env.backrefed_mem == 0 05420 #ifdef USE_SUBEXP_CALL 05421 || scan_env.num_call == 0 05422 #endif 05423 ) { 05424 setup_comb_exp_check(root, 0, &scan_env); 05425 #ifdef USE_SUBEXP_CALL 05426 if (scan_env.has_recursion != 0) { 05427 scan_env.num_comb_exp_check = 0; 05428 } 05429 else 05430 #endif 05431 if (scan_env.comb_exp_max_regnum > 0) { 05432 int i; 05433 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { 05434 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { 05435 scan_env.num_comb_exp_check = 0; 05436 break; 05437 } 05438 } 05439 } 05440 } 05441 05442 reg->num_comb_exp_check = scan_env.num_comb_exp_check; 05443 #endif 05444 05445 clear_optimize_info(reg); 05446 #ifndef ONIG_DONT_OPTIMIZE 05447 r = set_optimize_info_from_tree(root, reg, &scan_env); 05448 if (r != 0) goto err_unset; 05449 #endif 05450 05451 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { 05452 xfree(scan_env.mem_nodes_dynamic); 05453 scan_env.mem_nodes_dynamic = (Node** )NULL; 05454 } 05455 05456 r = compile_tree(root, reg); 05457 if (r == 0) { 05458 r = add_opcode(reg, OP_END); 05459 #ifdef USE_SUBEXP_CALL 05460 if (scan_env.num_call > 0) { 05461 r = unset_addr_list_fix(&uslist, reg); 05462 unset_addr_list_end(&uslist); 05463 if (r) goto err; 05464 } 05465 #endif 05466 05467 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) 05468 reg->stack_pop_level = STACK_POP_LEVEL_ALL; 05469 else { 05470 if (reg->bt_mem_start != 0) 05471 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; 05472 else 05473 reg->stack_pop_level = STACK_POP_LEVEL_FREE; 05474 } 05475 } 05476 #ifdef USE_SUBEXP_CALL 05477 else if (scan_env.num_call > 0) { 05478 unset_addr_list_end(&uslist); 05479 } 05480 #endif 05481 onig_node_free(root); 05482 05483 #ifdef ONIG_DEBUG_COMPILE 05484 #ifdef USE_NAMED_GROUP 05485 if (!onig_is_prelude()) onig_print_names(stderr, reg); 05486 #endif 05487 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg); 05488 #endif 05489 05490 end: 05491 reg->state = ONIG_STATE_NORMAL; 05492 return r; 05493 05494 err_unset: 05495 #ifdef USE_SUBEXP_CALL 05496 if (scan_env.num_call > 0) { 05497 unset_addr_list_end(&uslist); 05498 } 05499 #endif 05500 err: 05501 if (IS_NOT_NULL(scan_env.error)) { 05502 if (IS_NOT_NULL(einfo)) { 05503 einfo->enc = scan_env.enc; 05504 einfo->par = scan_env.error; 05505 einfo->par_end = scan_env.error_end; 05506 } 05507 } 05508 05509 onig_node_free(root); 05510 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) 05511 xfree(scan_env.mem_nodes_dynamic); 05512 return r; 05513 } 05514 05515 #ifdef USE_RECOMPILE_API 05516 extern int 05517 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05518 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 05519 OnigErrorInfo* einfo) 05520 { 05521 int r; 05522 regex_t *new_reg; 05523 05524 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); 05525 if (r) return r; 05526 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 05527 onig_transfer(reg, new_reg); 05528 } 05529 else { 05530 onig_chain_link_add(reg, new_reg); 05531 } 05532 return 0; 05533 } 05534 #endif 05535 05536 static int onig_inited = 0; 05537 05538 extern int 05539 onig_reg_init(regex_t* reg, OnigOptionType option, 05540 OnigCaseFoldType case_fold_flag, 05541 OnigEncoding enc, const OnigSyntaxType* syntax) 05542 { 05543 if (! onig_inited) 05544 onig_init(); 05545 05546 if (IS_NULL(reg)) 05547 return ONIGERR_INVALID_ARGUMENT; 05548 05549 if (ONIGENC_IS_UNDEF(enc)) 05550 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; 05551 05552 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) 05553 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { 05554 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; 05555 } 05556 05557 (reg)->state = ONIG_STATE_MODIFY; 05558 05559 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { 05560 option |= syntax->options; 05561 option &= ~ONIG_OPTION_SINGLELINE; 05562 } 05563 else 05564 option |= syntax->options; 05565 05566 (reg)->enc = enc; 05567 (reg)->options = option; 05568 (reg)->syntax = syntax; 05569 (reg)->optimize = 0; 05570 (reg)->exact = (UChar* )NULL; 05571 (reg)->int_map = (int* )NULL; 05572 (reg)->int_map_backward = (int* )NULL; 05573 (reg)->chain = (regex_t* )NULL; 05574 05575 (reg)->p = (UChar* )NULL; 05576 (reg)->alloc = 0; 05577 (reg)->used = 0; 05578 (reg)->name_table = (void* )NULL; 05579 05580 (reg)->case_fold_flag = case_fold_flag; 05581 return 0; 05582 } 05583 05584 extern int 05585 onig_new_without_alloc(regex_t* reg, const UChar* pattern, 05586 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, 05587 OnigSyntaxType* syntax, OnigErrorInfo* einfo) 05588 { 05589 int r; 05590 05591 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05592 if (r) return r; 05593 05594 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0); 05595 return r; 05596 } 05597 05598 extern int 05599 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 05600 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, 05601 OnigErrorInfo* einfo) 05602 { 05603 int r; 05604 05605 *reg = (regex_t* )xmalloc(sizeof(regex_t)); 05606 if (IS_NULL(*reg)) return ONIGERR_MEMORY; 05607 05608 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05609 if (r) goto err; 05610 05611 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0); 05612 if (r) { 05613 err: 05614 onig_free(*reg); 05615 *reg = NULL; 05616 } 05617 return r; 05618 } 05619 05620 05621 extern int 05622 onig_init(void) 05623 { 05624 if (onig_inited != 0) 05625 return 0; 05626 05627 THREAD_SYSTEM_INIT; 05628 THREAD_ATOMIC_START; 05629 05630 onig_inited = 1; 05631 05632 onigenc_init(); 05633 /* onigenc_set_default_caseconv_table((UChar* )0); */ 05634 05635 #ifdef ONIG_DEBUG_STATISTICS 05636 onig_statistics_init(); 05637 #endif 05638 05639 THREAD_ATOMIC_END; 05640 return 0; 05641 } 05642 05643 05644 extern int 05645 onig_end(void) 05646 { 05647 THREAD_ATOMIC_START; 05648 05649 #ifdef ONIG_DEBUG_STATISTICS 05650 if (!onig_is_prelude()) onig_print_statistics(stderr); 05651 #endif 05652 05653 #ifdef USE_SHARED_CCLASS_TABLE 05654 onig_free_shared_cclass_table(); 05655 #endif 05656 05657 #ifdef USE_PARSE_TREE_NODE_RECYCLE 05658 onig_free_node_list(); 05659 #endif 05660 05661 onig_inited = 0; 05662 05663 THREAD_ATOMIC_END; 05664 THREAD_SYSTEM_END; 05665 return 0; 05666 } 05667 05668 extern int 05669 onig_is_in_code_range(const UChar* p, OnigCodePoint code) 05670 { 05671 OnigCodePoint n, *data; 05672 OnigCodePoint low, high, x; 05673 05674 GET_CODE_POINT(n, p); 05675 data = (OnigCodePoint* )p; 05676 data++; 05677 05678 for (low = 0, high = n; low < high; ) { 05679 x = (low + high) >> 1; 05680 if (code > data[x * 2 + 1]) 05681 low = x + 1; 05682 else 05683 high = x; 05684 } 05685 05686 return ((low < n && code >= data[low * 2]) ? 1 : 0); 05687 } 05688 05689 extern int 05690 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) 05691 { 05692 int found; 05693 05694 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { 05695 if (IS_NULL(cc->mbuf)) { 05696 found = 0; 05697 } 05698 else { 05699 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); 05700 } 05701 } 05702 else { 05703 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); 05704 } 05705 05706 if (IS_NCCLASS_NOT(cc)) 05707 return !found; 05708 else 05709 return found; 05710 } 05711 05712 extern int 05713 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) 05714 { 05715 int len; 05716 05717 if (ONIGENC_MBC_MINLEN(enc) > 1) { 05718 len = 2; 05719 } 05720 else { 05721 len = ONIGENC_CODE_TO_MBCLEN(enc, code); 05722 } 05723 return onig_is_code_in_cc_len(len, code, cc); 05724 } 05725 05726 05727 #ifdef ONIG_DEBUG 05728 05729 /* arguments type */ 05730 #define ARG_SPECIAL -1 05731 #define ARG_NON 0 05732 #define ARG_RELADDR 1 05733 #define ARG_ABSADDR 2 05734 #define ARG_LENGTH 3 05735 #define ARG_MEMNUM 4 05736 #define ARG_OPTION 5 05737 #define ARG_STATE_CHECK 6 05738 05739 OnigOpInfoType OnigOpInfo[] = { 05740 { OP_FINISH, "finish", ARG_NON }, 05741 { OP_END, "end", ARG_NON }, 05742 { OP_EXACT1, "exact1", ARG_SPECIAL }, 05743 { OP_EXACT2, "exact2", ARG_SPECIAL }, 05744 { OP_EXACT3, "exact3", ARG_SPECIAL }, 05745 { OP_EXACT4, "exact4", ARG_SPECIAL }, 05746 { OP_EXACT5, "exact5", ARG_SPECIAL }, 05747 { OP_EXACTN, "exactn", ARG_SPECIAL }, 05748 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, 05749 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, 05750 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, 05751 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, 05752 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, 05753 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, 05754 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, 05755 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, 05756 { OP_CCLASS, "cclass", ARG_SPECIAL }, 05757 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, 05758 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, 05759 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, 05760 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, 05761 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, 05762 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, 05763 { OP_ANYCHAR, "anychar", ARG_NON }, 05764 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, 05765 { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, 05766 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, 05767 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, 05768 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, 05769 { OP_WORD, "word", ARG_NON }, 05770 { OP_NOT_WORD, "not-word", ARG_NON }, 05771 { OP_WORD_BOUND, "word-bound", ARG_NON }, 05772 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, 05773 { OP_WORD_BEGIN, "word-begin", ARG_NON }, 05774 { OP_WORD_END, "word-end", ARG_NON }, 05775 { OP_BEGIN_BUF, "begin-buf", ARG_NON }, 05776 { OP_END_BUF, "end-buf", ARG_NON }, 05777 { OP_BEGIN_LINE, "begin-line", ARG_NON }, 05778 { OP_END_LINE, "end-line", ARG_NON }, 05779 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, 05780 { OP_BEGIN_POSITION, "begin-position", ARG_NON }, 05781 { OP_BACKREF1, "backref1", ARG_NON }, 05782 { OP_BACKREF2, "backref2", ARG_NON }, 05783 { OP_BACKREFN, "backrefn", ARG_MEMNUM }, 05784 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, 05785 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, 05786 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, 05787 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, 05788 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, 05789 { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, 05790 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, 05791 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, 05792 { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, 05793 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, 05794 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, 05795 { OP_SET_OPTION, "set-option", ARG_OPTION }, 05796 { OP_FAIL, "fail", ARG_NON }, 05797 { OP_JUMP, "jump", ARG_RELADDR }, 05798 { OP_PUSH, "push", ARG_RELADDR }, 05799 { OP_POP, "pop", ARG_NON }, 05800 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, 05801 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, 05802 { OP_REPEAT, "repeat", ARG_SPECIAL }, 05803 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, 05804 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, 05805 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, 05806 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, 05807 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, 05808 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, 05809 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, 05810 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, 05811 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, 05812 { OP_PUSH_POS, "push-pos", ARG_NON }, 05813 { OP_POP_POS, "pop-pos", ARG_NON }, 05814 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, 05815 { OP_FAIL_POS, "fail-pos", ARG_NON }, 05816 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, 05817 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, 05818 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, 05819 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, 05820 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, 05821 { OP_CALL, "call", ARG_ABSADDR }, 05822 { OP_RETURN, "return", ARG_NON }, 05823 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, 05824 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, 05825 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, 05826 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, 05827 { OP_STATE_CHECK_ANYCHAR_ML_STAR, 05828 "state-check-anychar-ml*", ARG_STATE_CHECK }, 05829 { -1, "", ARG_NON } 05830 }; 05831 05832 static const char* 05833 op2name(int opcode) 05834 { 05835 int i; 05836 05837 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 05838 if (opcode == OnigOpInfo[i].opcode) 05839 return OnigOpInfo[i].name; 05840 } 05841 return ""; 05842 } 05843 05844 static int 05845 op2arg_type(int opcode) 05846 { 05847 int i; 05848 05849 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 05850 if (opcode == OnigOpInfo[i].opcode) 05851 return OnigOpInfo[i].arg_type; 05852 } 05853 return ARG_SPECIAL; 05854 } 05855 05856 static void 05857 Indent(FILE* f, int indent) 05858 { 05859 int i; 05860 for (i = 0; i < indent; i++) putc(' ', f); 05861 } 05862 05863 static void 05864 p_string(FILE* f, int len, UChar* s) 05865 { 05866 fputs(":", f); 05867 while (len-- > 0) { fputc(*s++, f); } 05868 } 05869 05870 static void 05871 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) 05872 { 05873 int x = len * mb_len; 05874 05875 fprintf(f, ":%d:", len); 05876 while (x-- > 0) { fputc(*s++, f); } 05877 } 05878 05879 extern void 05880 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, 05881 OnigEncoding enc) 05882 { 05883 int i, n, arg_type; 05884 RelAddrType addr; 05885 LengthType len; 05886 MemNumType mem; 05887 StateCheckNumType scn; 05888 OnigCodePoint code; 05889 UChar *q; 05890 05891 fprintf(f, "[%s", op2name(*bp)); 05892 arg_type = op2arg_type(*bp); 05893 if (arg_type != ARG_SPECIAL) { 05894 bp++; 05895 switch (arg_type) { 05896 case ARG_NON: 05897 break; 05898 case ARG_RELADDR: 05899 GET_RELADDR_INC(addr, bp); 05900 fprintf(f, ":(%d)", addr); 05901 break; 05902 case ARG_ABSADDR: 05903 GET_ABSADDR_INC(addr, bp); 05904 fprintf(f, ":(%d)", addr); 05905 break; 05906 case ARG_LENGTH: 05907 GET_LENGTH_INC(len, bp); 05908 fprintf(f, ":%d", len); 05909 break; 05910 case ARG_MEMNUM: 05911 mem = *((MemNumType* )bp); 05912 bp += SIZE_MEMNUM; 05913 fprintf(f, ":%d", mem); 05914 break; 05915 case ARG_OPTION: 05916 { 05917 OnigOptionType option = *((OnigOptionType* )bp); 05918 bp += SIZE_OPTION; 05919 fprintf(f, ":%d", option); 05920 } 05921 break; 05922 05923 case ARG_STATE_CHECK: 05924 scn = *((StateCheckNumType* )bp); 05925 bp += SIZE_STATE_CHECK_NUM; 05926 fprintf(f, ":%d", scn); 05927 break; 05928 } 05929 } 05930 else { 05931 switch (*bp++) { 05932 case OP_EXACT1: 05933 case OP_ANYCHAR_STAR_PEEK_NEXT: 05934 case OP_ANYCHAR_ML_STAR_PEEK_NEXT: 05935 p_string(f, 1, bp++); break; 05936 case OP_EXACT2: 05937 p_string(f, 2, bp); bp += 2; break; 05938 case OP_EXACT3: 05939 p_string(f, 3, bp); bp += 3; break; 05940 case OP_EXACT4: 05941 p_string(f, 4, bp); bp += 4; break; 05942 case OP_EXACT5: 05943 p_string(f, 5, bp); bp += 5; break; 05944 case OP_EXACTN: 05945 GET_LENGTH_INC(len, bp); 05946 p_len_string(f, len, 1, bp); 05947 bp += len; 05948 break; 05949 05950 case OP_EXACTMB2N1: 05951 p_string(f, 2, bp); bp += 2; break; 05952 case OP_EXACTMB2N2: 05953 p_string(f, 4, bp); bp += 4; break; 05954 case OP_EXACTMB2N3: 05955 p_string(f, 6, bp); bp += 6; break; 05956 case OP_EXACTMB2N: 05957 GET_LENGTH_INC(len, bp); 05958 p_len_string(f, len, 2, bp); 05959 bp += len * 2; 05960 break; 05961 case OP_EXACTMB3N: 05962 GET_LENGTH_INC(len, bp); 05963 p_len_string(f, len, 3, bp); 05964 bp += len * 3; 05965 break; 05966 case OP_EXACTMBN: 05967 { 05968 int mb_len; 05969 05970 GET_LENGTH_INC(mb_len, bp); 05971 GET_LENGTH_INC(len, bp); 05972 fprintf(f, ":%d:%d:", mb_len, len); 05973 n = len * mb_len; 05974 while (n-- > 0) { fputc(*bp++, f); } 05975 } 05976 break; 05977 05978 case OP_EXACT1_IC: 05979 len = enclen(enc, bp, bpend); 05980 p_string(f, len, bp); 05981 bp += len; 05982 break; 05983 case OP_EXACTN_IC: 05984 GET_LENGTH_INC(len, bp); 05985 p_len_string(f, len, 1, bp); 05986 bp += len; 05987 break; 05988 05989 case OP_CCLASS: 05990 n = bitset_on_num((BitSetRef )bp); 05991 bp += SIZE_BITSET; 05992 fprintf(f, ":%d", n); 05993 break; 05994 05995 case OP_CCLASS_NOT: 05996 n = bitset_on_num((BitSetRef )bp); 05997 bp += SIZE_BITSET; 05998 fprintf(f, ":%d", n); 05999 break; 06000 06001 case OP_CCLASS_MB: 06002 case OP_CCLASS_MB_NOT: 06003 GET_LENGTH_INC(len, bp); 06004 q = bp; 06005 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06006 ALIGNMENT_RIGHT(q); 06007 #endif 06008 GET_CODE_POINT(code, q); 06009 bp += len; 06010 fprintf(f, ":%d:%d", (int )code, len); 06011 break; 06012 06013 case OP_CCLASS_MIX: 06014 case OP_CCLASS_MIX_NOT: 06015 n = bitset_on_num((BitSetRef )bp); 06016 bp += SIZE_BITSET; 06017 GET_LENGTH_INC(len, bp); 06018 q = bp; 06019 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06020 ALIGNMENT_RIGHT(q); 06021 #endif 06022 GET_CODE_POINT(code, q); 06023 bp += len; 06024 fprintf(f, ":%d:%d:%d", n, (int )code, len); 06025 break; 06026 06027 case OP_CCLASS_NODE: 06028 { 06029 CClassNode *cc; 06030 06031 GET_POINTER_INC(cc, bp); 06032 n = bitset_on_num(cc->bs); 06033 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n); 06034 } 06035 break; 06036 06037 case OP_BACKREFN_IC: 06038 mem = *((MemNumType* )bp); 06039 bp += SIZE_MEMNUM; 06040 fprintf(f, ":%d", mem); 06041 break; 06042 06043 case OP_BACKREF_MULTI_IC: 06044 case OP_BACKREF_MULTI: 06045 fputs(" ", f); 06046 GET_LENGTH_INC(len, bp); 06047 for (i = 0; i < len; i++) { 06048 GET_MEMNUM_INC(mem, bp); 06049 if (i > 0) fputs(", ", f); 06050 fprintf(f, "%d", mem); 06051 } 06052 break; 06053 06054 case OP_BACKREF_WITH_LEVEL: 06055 { 06056 OnigOptionType option; 06057 LengthType level; 06058 06059 GET_OPTION_INC(option, bp); 06060 fprintf(f, ":%d", option); 06061 GET_LENGTH_INC(level, bp); 06062 fprintf(f, ":%d", level); 06063 06064 fputs(" ", f); 06065 GET_LENGTH_INC(len, bp); 06066 for (i = 0; i < len; i++) { 06067 GET_MEMNUM_INC(mem, bp); 06068 if (i > 0) fputs(", ", f); 06069 fprintf(f, "%d", mem); 06070 } 06071 } 06072 break; 06073 06074 case OP_REPEAT: 06075 case OP_REPEAT_NG: 06076 { 06077 mem = *((MemNumType* )bp); 06078 bp += SIZE_MEMNUM; 06079 addr = *((RelAddrType* )bp); 06080 bp += SIZE_RELADDR; 06081 fprintf(f, ":%d:%d", mem, addr); 06082 } 06083 break; 06084 06085 case OP_PUSH_OR_JUMP_EXACT1: 06086 case OP_PUSH_IF_PEEK_NEXT: 06087 addr = *((RelAddrType* )bp); 06088 bp += SIZE_RELADDR; 06089 fprintf(f, ":(%d)", addr); 06090 p_string(f, 1, bp); 06091 bp += 1; 06092 break; 06093 06094 case OP_LOOK_BEHIND: 06095 GET_LENGTH_INC(len, bp); 06096 fprintf(f, ":%d", len); 06097 break; 06098 06099 case OP_PUSH_LOOK_BEHIND_NOT: 06100 GET_RELADDR_INC(addr, bp); 06101 GET_LENGTH_INC(len, bp); 06102 fprintf(f, ":%d:(%d)", len, addr); 06103 break; 06104 06105 case OP_STATE_CHECK_PUSH: 06106 case OP_STATE_CHECK_PUSH_OR_JUMP: 06107 scn = *((StateCheckNumType* )bp); 06108 bp += SIZE_STATE_CHECK_NUM; 06109 addr = *((RelAddrType* )bp); 06110 bp += SIZE_RELADDR; 06111 fprintf(f, ":%d:(%d)", scn, addr); 06112 break; 06113 06114 default: 06115 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", 06116 *--bp); 06117 } 06118 } 06119 fputs("]", f); 06120 if (nextp) *nextp = bp; 06121 } 06122 06123 static void 06124 print_compiled_byte_code_list(FILE* f, regex_t* reg) 06125 { 06126 int ncode; 06127 UChar* bp = reg->p; 06128 UChar* end = reg->p + reg->used; 06129 06130 fprintf(f, "code length: %d\n", reg->used); 06131 06132 ncode = -1; 06133 while (bp < end) { 06134 ncode++; 06135 if (bp > reg->p) { 06136 if (ncode % 5 == 0) 06137 fprintf(f, "\n%ld:", bp-reg->p); 06138 else 06139 fprintf(f, " %ld:", bp-reg->p); 06140 } 06141 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc); 06142 } 06143 06144 fprintf(f, "\n"); 06145 } 06146 06147 static void 06148 print_indent_tree(FILE* f, Node* node, int indent) 06149 { 06150 int i, type, container_p = 0; 06151 int add = 3; 06152 UChar* p; 06153 06154 Indent(f, indent); 06155 if (IS_NULL(node)) { 06156 fprintf(f, "ERROR: null node!!!\n"); 06157 exit (0); 06158 } 06159 06160 type = NTYPE(node); 06161 switch (type) { 06162 case NT_LIST: 06163 case NT_ALT: 06164 if (NTYPE(node) == NT_LIST) 06165 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node); 06166 else 06167 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node); 06168 06169 print_indent_tree(f, NCAR(node), indent + add); 06170 while (IS_NOT_NULL(node = NCDR(node))) { 06171 if (NTYPE(node) != type) { 06172 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); 06173 exit(0); 06174 } 06175 print_indent_tree(f, NCAR(node), indent + add); 06176 } 06177 break; 06178 06179 case NT_STR: 06180 fprintf(f, "<string%s:%"PRIxPTR">", 06181 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node); 06182 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { 06183 if (*p >= 0x20 && *p < 0x7f) 06184 fputc(*p, f); 06185 else { 06186 fprintf(f, " 0x%02x", *p); 06187 } 06188 } 06189 break; 06190 06191 case NT_CCLASS: 06192 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node); 06193 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); 06194 if (NCCLASS(node)->mbuf) { 06195 BBuf* bbuf = NCCLASS(node)->mbuf; 06196 for (i = 0; i < (int)bbuf->used; i++) { 06197 if (i > 0) fprintf(f, ","); 06198 fprintf(f, "%0x", bbuf->p[i]); 06199 } 06200 } 06201 break; 06202 06203 case NT_CTYPE: 06204 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node); 06205 switch (NCTYPE(node)->ctype) { 06206 case ONIGENC_CTYPE_WORD: 06207 if (NCTYPE(node)->not != 0) 06208 fputs("not word", f); 06209 else 06210 fputs("word", f); 06211 break; 06212 06213 default: 06214 fprintf(f, "ERROR: undefined ctype.\n"); 06215 exit(0); 06216 } 06217 break; 06218 06219 case NT_CANY: 06220 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node); 06221 break; 06222 06223 case NT_ANCHOR: 06224 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node); 06225 switch (NANCHOR(node)->type) { 06226 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; 06227 case ANCHOR_END_BUF: fputs("end buf", f); break; 06228 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; 06229 case ANCHOR_END_LINE: fputs("end line", f); break; 06230 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; 06231 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; 06232 06233 case ANCHOR_WORD_BOUND: fputs("word bound", f); break; 06234 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; 06235 #ifdef USE_WORD_BEGIN_END 06236 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; 06237 case ANCHOR_WORD_END: fputs("word end", f); break; 06238 #endif 06239 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break; 06240 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break; 06241 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break; 06242 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break; 06243 06244 default: 06245 fprintf(f, "ERROR: undefined anchor type.\n"); 06246 break; 06247 } 06248 break; 06249 06250 case NT_BREF: 06251 { 06252 int* p; 06253 BRefNode* br = NBREF(node); 06254 p = BACKREFS_P(br); 06255 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node); 06256 for (i = 0; i < br->back_num; i++) { 06257 if (i > 0) fputs(", ", f); 06258 fprintf(f, "%d", p[i]); 06259 } 06260 } 06261 break; 06262 06263 #ifdef USE_SUBEXP_CALL 06264 case NT_CALL: 06265 { 06266 CallNode* cn = NCALL(node); 06267 fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node); 06268 p_string(f, cn->name_end - cn->name, cn->name); 06269 } 06270 break; 06271 #endif 06272 06273 case NT_QTFR: 06274 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node, 06275 NQTFR(node)->lower, NQTFR(node)->upper, 06276 (NQTFR(node)->greedy ? "" : "?")); 06277 print_indent_tree(f, NQTFR(node)->target, indent + add); 06278 break; 06279 06280 case NT_ENCLOSE: 06281 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node); 06282 switch (NENCLOSE(node)->type) { 06283 case ENCLOSE_OPTION: 06284 fprintf(f, "option:%d\n", NENCLOSE(node)->option); 06285 print_indent_tree(f, NENCLOSE(node)->target, indent + add); 06286 break; 06287 case ENCLOSE_MEMORY: 06288 fprintf(f, "memory:%d", NENCLOSE(node)->regnum); 06289 break; 06290 case ENCLOSE_STOP_BACKTRACK: 06291 fprintf(f, "stop-bt"); 06292 break; 06293 06294 default: 06295 break; 06296 } 06297 fprintf(f, "\n"); 06298 print_indent_tree(f, NENCLOSE(node)->target, indent + add); 06299 break; 06300 06301 default: 06302 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); 06303 break; 06304 } 06305 06306 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && 06307 type != NT_ENCLOSE) 06308 fprintf(f, "\n"); 06309 06310 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add); 06311 06312 fflush(f); 06313 } 06314 #endif /* ONIG_DEBUG */ 06315 06316 #ifdef ONIG_DEBUG_PARSE_TREE 06317 static void 06318 print_tree(FILE* f, Node* node) 06319 { 06320 print_indent_tree(f, node, 0); 06321 } 06322 #endif 06323
1.7.6.1