Ruby  1.9.3p448(2013-06-27revision41675)
regcomp.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regcomp.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regparse.h"
00031 
00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00033 
00034 extern OnigCaseFoldType
00035 onig_get_default_case_fold_flag(void)
00036 {
00037   return OnigDefaultCaseFoldFlag;
00038 }
00039 
00040 extern int
00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00042 {
00043   OnigDefaultCaseFoldFlag = case_fold_flag;
00044   return 0;
00045 }
00046 
00047 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00048 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00049 #endif
00050 
00051 static UChar*
00052 str_dup(UChar* s, UChar* end)
00053 {
00054   ptrdiff_t len = end - s;
00055 
00056   if (len > 0) {
00057     UChar* r = (UChar* )xmalloc(len + 1);
00058     CHECK_NULL_RETURN(r);
00059     xmemcpy(r, s, len);
00060     r[len] = (UChar )0;
00061     return r;
00062   }
00063   else return NULL;
00064 }
00065 
00066 static void
00067 swap_node(Node* a, Node* b)
00068 {
00069   Node c;
00070   c = *a; *a = *b; *b = c;
00071 
00072   if (NTYPE(a) == NT_STR) {
00073     StrNode* sn = NSTR(a);
00074     if (sn->capa == 0) {
00075       size_t len = sn->end - sn->s;
00076       sn->s   = sn->buf;
00077       sn->end = sn->s + len;
00078     }
00079   }
00080 
00081   if (NTYPE(b) == NT_STR) {
00082     StrNode* sn = NSTR(b);
00083     if (sn->capa == 0) {
00084       size_t len = sn->end - sn->s;
00085       sn->s   = sn->buf;
00086       sn->end = sn->s + len;
00087     }
00088   }
00089 }
00090 
00091 static OnigDistance
00092 distance_add(OnigDistance d1, OnigDistance d2)
00093 {
00094   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00095     return ONIG_INFINITE_DISTANCE;
00096   else {
00097     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00098     else return ONIG_INFINITE_DISTANCE;
00099   }
00100 }
00101 
00102 static OnigDistance
00103 distance_multiply(OnigDistance d, int m)
00104 {
00105   if (m == 0) return 0;
00106 
00107   if (d < ONIG_INFINITE_DISTANCE / m)
00108     return d * m;
00109   else
00110     return ONIG_INFINITE_DISTANCE;
00111 }
00112 
00113 static int
00114 bitset_is_empty(BitSetRef bs)
00115 {
00116   int i;
00117   for (i = 0; i < (int )BITSET_SIZE; i++) {
00118     if (bs[i] != 0) return 0;
00119   }
00120   return 1;
00121 }
00122 
00123 #ifdef ONIG_DEBUG
00124 static int
00125 onig_is_prelude(void)
00126 {
00127     return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
00128 }
00129 
00130 static int
00131 bitset_on_num(BitSetRef bs)
00132 {
00133   int i, n;
00134 
00135   n = 0;
00136   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00137     if (BITSET_AT(bs, i)) n++;
00138   }
00139   return n;
00140 }
00141 #endif
00142 
00143 extern int
00144 onig_bbuf_init(BBuf* buf, OnigDistance size)
00145 {
00146   if (size <= 0) {
00147     size   = 0;
00148     buf->p = NULL;
00149   }
00150   else {
00151     buf->p = (UChar* )xmalloc(size);
00152     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00153   }
00154 
00155   buf->alloc = (unsigned int)size;
00156   buf->used  = 0;
00157   return 0;
00158 }
00159 
00160 
00161 #ifdef USE_SUBEXP_CALL
00162 
00163 static int
00164 unset_addr_list_init(UnsetAddrList* uslist, int size)
00165 {
00166   UnsetAddr* p;
00167 
00168   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00169   CHECK_NULL_RETURN_MEMERR(p);
00170   uslist->num   = 0;
00171   uslist->alloc = size;
00172   uslist->us    = p;
00173   return 0;
00174 }
00175 
00176 static void
00177 unset_addr_list_end(UnsetAddrList* uslist)
00178 {
00179   if (IS_NOT_NULL(uslist->us))
00180     xfree(uslist->us);
00181 }
00182 
00183 static int
00184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00185 {
00186   UnsetAddr* p;
00187   int size;
00188 
00189   if (uslist->num >= uslist->alloc) {
00190     size = uslist->alloc * 2;
00191     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00192     CHECK_NULL_RETURN_MEMERR(p);
00193     uslist->alloc = size;
00194     uslist->us    = p;
00195   }
00196 
00197   uslist->us[uslist->num].offset = offset;
00198   uslist->us[uslist->num].target = node;
00199   uslist->num++;
00200   return 0;
00201 }
00202 #endif /* USE_SUBEXP_CALL */
00203 
00204 
00205 static int
00206 add_opcode(regex_t* reg, int opcode)
00207 {
00208   BBUF_ADD1(reg, opcode);
00209   return 0;
00210 }
00211 
00212 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00213 static int
00214 add_state_check_num(regex_t* reg, int num)
00215 {
00216   StateCheckNumType n = (StateCheckNumType )num;
00217 
00218   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00219   return 0;
00220 }
00221 #endif
00222 
00223 static int
00224 add_rel_addr(regex_t* reg, int addr)
00225 {
00226   RelAddrType ra = (RelAddrType )addr;
00227 
00228   BBUF_ADD(reg, &ra, SIZE_RELADDR);
00229   return 0;
00230 }
00231 
00232 static int
00233 add_abs_addr(regex_t* reg, int addr)
00234 {
00235   AbsAddrType ra = (AbsAddrType )addr;
00236 
00237   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00238   return 0;
00239 }
00240 
00241 static int
00242 add_length(regex_t* reg, OnigDistance len)
00243 {
00244   LengthType l = (LengthType )len;
00245 
00246   BBUF_ADD(reg, &l, SIZE_LENGTH);
00247   return 0;
00248 }
00249 
00250 static int
00251 add_mem_num(regex_t* reg, int num)
00252 {
00253   MemNumType n = (MemNumType )num;
00254 
00255   BBUF_ADD(reg, &n, SIZE_MEMNUM);
00256   return 0;
00257 }
00258 
00259 static int
00260 add_pointer(regex_t* reg, void* addr)
00261 {
00262   PointerType ptr = (PointerType )addr;
00263 
00264   BBUF_ADD(reg, &ptr, SIZE_POINTER);
00265   return 0;
00266 }
00267 
00268 static int
00269 add_option(regex_t* reg, OnigOptionType option)
00270 {
00271   BBUF_ADD(reg, &option, SIZE_OPTION);
00272   return 0;
00273 }
00274 
00275 static int
00276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00277 {
00278   int r;
00279 
00280   r = add_opcode(reg, opcode);
00281   if (r) return r;
00282   r = add_rel_addr(reg, addr);
00283   return r;
00284 }
00285 
00286 static int
00287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
00288 {
00289   BBUF_ADD(reg, bytes, len);
00290   return 0;
00291 }
00292 
00293 static int
00294 add_bitset(regex_t* reg, BitSetRef bs)
00295 {
00296   BBUF_ADD(reg, bs, SIZE_BITSET);
00297   return 0;
00298 }
00299 
00300 static int
00301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00302 {
00303   int r;
00304 
00305   r = add_opcode(reg, opcode);
00306   if (r) return r;
00307   r = add_option(reg, option);
00308   return r;
00309 }
00310 
00311 static int compile_length_tree(Node* node, regex_t* reg);
00312 static int compile_tree(Node* node, regex_t* reg);
00313 
00314 
00315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00316    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
00317     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
00318 
00319 static int
00320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
00321 {
00322   int op;
00323 
00324   if (ignore_case) {
00325     switch (str_len) {
00326     case 1:  op = OP_EXACT1_IC; break;
00327     default: op = OP_EXACTN_IC; break;
00328     }
00329   }
00330   else {
00331     switch (mb_len) {
00332     case 1:
00333       switch (str_len) {
00334       case 1:  op = OP_EXACT1; break;
00335       case 2:  op = OP_EXACT2; break;
00336       case 3:  op = OP_EXACT3; break;
00337       case 4:  op = OP_EXACT4; break;
00338       case 5:  op = OP_EXACT5; break;
00339       default: op = OP_EXACTN; break;
00340       }
00341       break;
00342 
00343     case 2:
00344       switch (str_len) {
00345       case 1:  op = OP_EXACTMB2N1; break;
00346       case 2:  op = OP_EXACTMB2N2; break;
00347       case 3:  op = OP_EXACTMB2N3; break;
00348       default: op = OP_EXACTMB2N;  break;
00349       }
00350       break;
00351 
00352     case 3:
00353       op = OP_EXACTMB3N;
00354       break;
00355 
00356     default:
00357       op = OP_EXACTMBN;
00358       break;
00359     }
00360   }
00361   return op;
00362 }
00363 
00364 static int
00365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00366 {
00367   int r;
00368   int saved_num_null_check = reg->num_null_check;
00369 
00370   if (empty_info != 0) {
00371     r = add_opcode(reg, OP_NULL_CHECK_START);
00372     if (r) return r;
00373     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
00374     if (r) return r;
00375     reg->num_null_check++;
00376   }
00377 
00378   r = compile_tree(node, reg);
00379   if (r) return r;
00380 
00381   if (empty_info != 0) {
00382     if (empty_info == NQ_TARGET_IS_EMPTY)
00383       r = add_opcode(reg, OP_NULL_CHECK_END);
00384     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00385       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00386     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00387       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00388 
00389     if (r) return r;
00390     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
00391   }
00392   return r;
00393 }
00394 
00395 #ifdef USE_SUBEXP_CALL
00396 static int
00397 compile_call(CallNode* node, regex_t* reg)
00398 {
00399   int r;
00400 
00401   r = add_opcode(reg, OP_CALL);
00402   if (r) return r;
00403   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00404                           node->target);
00405   if (r) return r;
00406   r = add_abs_addr(reg, 0 /*dummy addr.*/);
00407   return r;
00408 }
00409 #endif
00410 
00411 static int
00412 compile_tree_n_times(Node* node, int n, regex_t* reg)
00413 {
00414   int i, r;
00415 
00416   for (i = 0; i < n; i++) {
00417     r = compile_tree(node, reg);
00418     if (r) return r;
00419   }
00420   return 0;
00421 }
00422 
00423 static int
00424 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00425                           regex_t* reg ARG_UNUSED, int ignore_case)
00426 {
00427   int len;
00428   int op = select_str_opcode(mb_len, str_len, ignore_case);
00429 
00430   len = SIZE_OPCODE;
00431 
00432   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
00433   if (IS_NEED_STR_LEN_OP_EXACT(op))
00434     len += SIZE_LENGTH;
00435 
00436   len += mb_len * str_len;
00437   return len;
00438 }
00439 
00440 static int
00441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
00442                    regex_t* reg, int ignore_case)
00443 {
00444   int op = select_str_opcode(mb_len, str_len, ignore_case);
00445   add_opcode(reg, op);
00446 
00447   if (op == OP_EXACTMBN)
00448     add_length(reg, mb_len);
00449 
00450   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00451     if (op == OP_EXACTN_IC)
00452       add_length(reg, mb_len * str_len);
00453     else
00454       add_length(reg, str_len);
00455   }
00456 
00457   add_bytes(reg, s, mb_len * str_len);
00458   return 0;
00459 }
00460 
00461 
00462 static int
00463 compile_length_string_node(Node* node, regex_t* reg)
00464 {
00465   int rlen, r, len, prev_len, slen, ambig;
00466   OnigEncoding enc = reg->enc;
00467   UChar *p, *prev;
00468   StrNode* sn;
00469 
00470   sn = NSTR(node);
00471   if (sn->end <= sn->s)
00472     return 0;
00473 
00474   ambig = NSTRING_IS_AMBIG(node);
00475 
00476   p = prev = sn->s;
00477   prev_len = enclen(enc, p, sn->end);
00478   p += prev_len;
00479   slen = 1;
00480   rlen = 0;
00481 
00482   for (; p < sn->end; ) {
00483     len = enclen(enc, p, sn->end);
00484     if (len == prev_len) {
00485       slen++;
00486     }
00487     else {
00488       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00489       rlen += r;
00490       prev = p;
00491       slen = 1;
00492       prev_len = len;
00493     }
00494     p += len;
00495   }
00496   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00497   rlen += r;
00498   return rlen;
00499 }
00500 
00501 static int
00502 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00503 {
00504   if (sn->end <= sn->s)
00505     return 0;
00506 
00507   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00508 }
00509 
00510 static int
00511 compile_string_node(Node* node, regex_t* reg)
00512 {
00513   int r, len, prev_len, slen, ambig;
00514   OnigEncoding enc = reg->enc;
00515   UChar *p, *prev, *end;
00516   StrNode* sn;
00517 
00518   sn = NSTR(node);
00519   if (sn->end <= sn->s)
00520     return 0;
00521 
00522   end = sn->end;
00523   ambig = NSTRING_IS_AMBIG(node);
00524 
00525   p = prev = sn->s;
00526   prev_len = enclen(enc, p, end);
00527   p += prev_len;
00528   slen = 1;
00529 
00530   for (; p < end; ) {
00531     len = enclen(enc, p, end);
00532     if (len == prev_len) {
00533       slen++;
00534     }
00535     else {
00536       r = add_compile_string(prev, prev_len, slen, reg, ambig);
00537       if (r) return r;
00538 
00539       prev  = p;
00540       slen  = 1;
00541       prev_len = len;
00542     }
00543 
00544     p += len;
00545   }
00546   return add_compile_string(prev, prev_len, slen, reg, ambig);
00547 }
00548 
00549 static int
00550 compile_string_raw_node(StrNode* sn, regex_t* reg)
00551 {
00552   if (sn->end <= sn->s)
00553     return 0;
00554 
00555   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00556 }
00557 
00558 static int
00559 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00560 {
00561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00562   add_length(reg, mbuf->used);
00563   return add_bytes(reg, mbuf->p, mbuf->used);
00564 #else
00565   int r, pad_size;
00566   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00567 
00568   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00569   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00570   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00571 
00572   r = add_bytes(reg, mbuf->p, mbuf->used);
00573 
00574   /* padding for return value from compile_length_cclass_node() to be fix. */
00575   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00576   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00577   return r;
00578 #endif
00579 }
00580 
00581 static int
00582 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00583 {
00584   int len;
00585 
00586   if (IS_NCCLASS_SHARE(cc)) {
00587     len = SIZE_OPCODE + SIZE_POINTER;
00588     return len;
00589   }
00590 
00591   if (IS_NULL(cc->mbuf)) {
00592     len = SIZE_OPCODE + SIZE_BITSET;
00593   }
00594   else {
00595     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00596       len = SIZE_OPCODE;
00597     }
00598     else {
00599       len = SIZE_OPCODE + SIZE_BITSET;
00600     }
00601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00602     len += SIZE_LENGTH + cc->mbuf->used;
00603 #else
00604     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00605 #endif
00606   }
00607 
00608   return len;
00609 }
00610 
00611 static int
00612 compile_cclass_node(CClassNode* cc, regex_t* reg)
00613 {
00614   int r;
00615 
00616   if (IS_NCCLASS_SHARE(cc)) {
00617     add_opcode(reg, OP_CCLASS_NODE);
00618     r = add_pointer(reg, cc);
00619     return r;
00620   }
00621 
00622   if (IS_NULL(cc->mbuf)) {
00623     if (IS_NCCLASS_NOT(cc))
00624       add_opcode(reg, OP_CCLASS_NOT);
00625     else
00626       add_opcode(reg, OP_CCLASS);
00627 
00628     r = add_bitset(reg, cc->bs);
00629   }
00630   else {
00631     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00632       if (IS_NCCLASS_NOT(cc))
00633         add_opcode(reg, OP_CCLASS_MB_NOT);
00634       else
00635         add_opcode(reg, OP_CCLASS_MB);
00636 
00637       r = add_multi_byte_cclass(cc->mbuf, reg);
00638     }
00639     else {
00640       if (IS_NCCLASS_NOT(cc))
00641         add_opcode(reg, OP_CCLASS_MIX_NOT);
00642       else
00643         add_opcode(reg, OP_CCLASS_MIX);
00644 
00645       r = add_bitset(reg, cc->bs);
00646       if (r) return r;
00647       r = add_multi_byte_cclass(cc->mbuf, reg);
00648     }
00649   }
00650 
00651   return r;
00652 }
00653 
00654 static int
00655 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00656 {
00657 #define REPEAT_RANGE_ALLOC  4
00658 
00659   OnigRepeatRange* p;
00660 
00661   if (reg->repeat_range_alloc == 0) {
00662     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00663     CHECK_NULL_RETURN_MEMERR(p);
00664     reg->repeat_range = p;
00665     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00666   }
00667   else if (reg->repeat_range_alloc <= id) {
00668     int n;
00669     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00670     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00671                                     sizeof(OnigRepeatRange) * n);
00672     CHECK_NULL_RETURN_MEMERR(p);
00673     reg->repeat_range = p;
00674     reg->repeat_range_alloc = n;
00675   }
00676   else {
00677     p = reg->repeat_range;
00678   }
00679 
00680   p[id].lower = lower;
00681   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00682   return 0;
00683 }
00684 
00685 static int
00686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00687                           regex_t* reg)
00688 {
00689   int r;
00690   int num_repeat = reg->num_repeat;
00691 
00692   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00693   if (r) return r;
00694   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00695   reg->num_repeat++;
00696   if (r) return r;
00697   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00698   if (r) return r;
00699 
00700   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00701   if (r) return r;
00702 
00703   r = compile_tree_empty_check(qn->target, reg, empty_info);
00704   if (r) return r;
00705 
00706   if (
00707 #ifdef USE_SUBEXP_CALL
00708       reg->num_call > 0 ||
00709 #endif
00710       IS_QUANTIFIER_IN_REPEAT(qn)) {
00711     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00712   }
00713   else {
00714     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00715   }
00716   if (r) return r;
00717   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00718   return r;
00719 }
00720 
00721 static int
00722 is_anychar_star_quantifier(QtfrNode* qn)
00723 {
00724   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00725       NTYPE(qn->target) == NT_CANY)
00726     return 1;
00727   else
00728     return 0;
00729 }
00730 
00731 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
00732 #define CKN_ON   (ckn > 0)
00733 
00734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00735 
00736 static int
00737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00738 {
00739   int len, mod_tlen, cklen;
00740   int ckn;
00741   int infinite = IS_REPEAT_INFINITE(qn->upper);
00742   int empty_info = qn->target_empty_info;
00743   int tlen = compile_length_tree(qn->target, reg);
00744 
00745   if (tlen < 0) return tlen;
00746 
00747   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00748 
00749   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00750 
00751   /* anychar repeat */
00752   if (NTYPE(qn->target) == NT_CANY) {
00753     if (qn->greedy && infinite) {
00754       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00755         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00756       else
00757         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00758     }
00759   }
00760 
00761   if (empty_info != 0)
00762     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00763   else
00764     mod_tlen = tlen;
00765 
00766   if (infinite && qn->lower <= 1) {
00767     if (qn->greedy) {
00768       if (qn->lower == 1)
00769         len = SIZE_OP_JUMP;
00770       else
00771         len = 0;
00772 
00773       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00774     }
00775     else {
00776       if (qn->lower == 0)
00777         len = SIZE_OP_JUMP;
00778       else
00779         len = 0;
00780 
00781       len += mod_tlen + SIZE_OP_PUSH + cklen;
00782     }
00783   }
00784   else if (qn->upper == 0) {
00785     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
00786       len = SIZE_OP_JUMP + tlen;
00787     else
00788       len = 0;
00789   }
00790   else if (qn->upper == 1 && qn->greedy) {
00791     if (qn->lower == 0) {
00792       if (CKN_ON) {
00793         len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00794       }
00795       else {
00796         len = SIZE_OP_PUSH + tlen;
00797       }
00798     }
00799     else {
00800       len = tlen;
00801     }
00802   }
00803   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00804     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00805   }
00806   else {
00807     len = SIZE_OP_REPEAT_INC
00808         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00809     if (CKN_ON)
00810       len += SIZE_OP_STATE_CHECK;
00811   }
00812 
00813   return len;
00814 }
00815 
00816 static int
00817 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00818 {
00819   int r, mod_tlen;
00820   int ckn;
00821   int infinite = IS_REPEAT_INFINITE(qn->upper);
00822   int empty_info = qn->target_empty_info;
00823   int tlen = compile_length_tree(qn->target, reg);
00824 
00825   if (tlen < 0) return tlen;
00826 
00827   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00828 
00829   if (is_anychar_star_quantifier(qn)) {
00830     r = compile_tree_n_times(qn->target, qn->lower, reg);
00831     if (r) return r;
00832     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00833       if (IS_MULTILINE(reg->options))
00834         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00835       else
00836         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00837       if (r) return r;
00838       if (CKN_ON) {
00839         r = add_state_check_num(reg, ckn);
00840         if (r) return r;
00841       }
00842 
00843       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00844     }
00845     else {
00846       if (IS_MULTILINE(reg->options)) {
00847         r = add_opcode(reg, (CKN_ON ?
00848                                OP_STATE_CHECK_ANYCHAR_ML_STAR
00849                              : OP_ANYCHAR_ML_STAR));
00850       }
00851       else {
00852         r = add_opcode(reg, (CKN_ON ?
00853                                OP_STATE_CHECK_ANYCHAR_STAR
00854                              : OP_ANYCHAR_STAR));
00855       }
00856       if (r) return r;
00857       if (CKN_ON)
00858         r = add_state_check_num(reg, ckn);
00859 
00860       return r;
00861     }
00862   }
00863 
00864   if (empty_info != 0)
00865     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00866   else
00867     mod_tlen = tlen;
00868 
00869   if (infinite && qn->lower <= 1) {
00870     if (qn->greedy) {
00871       if (qn->lower == 1) {
00872         r = add_opcode_rel_addr(reg, OP_JUMP,
00873                         (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00874         if (r) return r;
00875       }
00876 
00877       if (CKN_ON) {
00878         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00879         if (r) return r;
00880         r = add_state_check_num(reg, ckn);
00881         if (r) return r;
00882         r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00883       }
00884       else {
00885         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00886       }
00887       if (r) return r;
00888       r = compile_tree_empty_check(qn->target, reg, empty_info);
00889       if (r) return r;
00890       r = add_opcode_rel_addr(reg, OP_JUMP,
00891               -(mod_tlen + (int )SIZE_OP_JUMP
00892                 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00893     }
00894     else {
00895       if (qn->lower == 0) {
00896         r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00897         if (r) return r;
00898       }
00899       r = compile_tree_empty_check(qn->target, reg, empty_info);
00900       if (r) return r;
00901       if (CKN_ON) {
00902         r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00903         if (r) return r;
00904         r = add_state_check_num(reg, ckn);
00905         if (r) return r;
00906         r = add_rel_addr(reg,
00907                  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00908       }
00909       else
00910         r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00911     }
00912   }
00913   else if (qn->upper == 0) {
00914     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
00915       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00916       if (r) return r;
00917       r = compile_tree(qn->target, reg);
00918     }
00919     else
00920       r = 0;
00921   }
00922   else if (qn->upper == 1 && qn->greedy) {
00923     if (qn->lower == 0) {
00924       if (CKN_ON) {
00925         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00926         if (r) return r;
00927         r = add_state_check_num(reg, ckn);
00928         if (r) return r;
00929         r = add_rel_addr(reg, tlen);
00930       }
00931       else {
00932         r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00933       }
00934       if (r) return r;
00935     }
00936 
00937     r = compile_tree(qn->target, reg);
00938   }
00939   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00940     if (CKN_ON) {
00941       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00942       if (r) return r;
00943       r = add_state_check_num(reg, ckn);
00944       if (r) return r;
00945       r = add_rel_addr(reg, SIZE_OP_JUMP);
00946     }
00947     else {
00948       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00949     }
00950 
00951     if (r) return r;
00952     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00953     if (r) return r;
00954     r = compile_tree(qn->target, reg);
00955   }
00956   else {
00957     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00958     if (CKN_ON) {
00959       if (r) return r;
00960       r = add_opcode(reg, OP_STATE_CHECK);
00961       if (r) return r;
00962       r = add_state_check_num(reg, ckn);
00963     }
00964   }
00965   return r;
00966 }
00967 
00968 #else /* USE_COMBINATION_EXPLOSION_CHECK */
00969 
00970 static int
00971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00972 {
00973   int len, mod_tlen;
00974   int infinite = IS_REPEAT_INFINITE(qn->upper);
00975   int empty_info = qn->target_empty_info;
00976   int tlen = compile_length_tree(qn->target, reg);
00977 
00978   if (tlen < 0) return tlen;
00979 
00980   /* anychar repeat */
00981   if (NTYPE(qn->target) == NT_CANY) {
00982     if (qn->greedy && infinite) {
00983       if (IS_NOT_NULL(qn->next_head_exact))
00984         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00985       else
00986         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00987     }
00988   }
00989 
00990   if (empty_info != 0)
00991     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00992   else
00993     mod_tlen = tlen;
00994 
00995   if (infinite &&
00996       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00997     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
00998       len = SIZE_OP_JUMP;
00999     }
01000     else {
01001       len = tlen * qn->lower;
01002     }
01003 
01004     if (qn->greedy) {
01005       if (IS_NOT_NULL(qn->head_exact))
01006         len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01007       else if (IS_NOT_NULL(qn->next_head_exact))
01008         len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01009       else
01010         len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01011     }
01012     else
01013       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01014   }
01015   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01016     len = SIZE_OP_JUMP + tlen;
01017   }
01018   else if (!infinite && qn->greedy &&
01019            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01020                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01021     len = tlen * qn->lower;
01022     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01023   }
01024   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01025     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01026   }
01027   else {
01028     len = SIZE_OP_REPEAT_INC
01029         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01030   }
01031 
01032   return len;
01033 }
01034 
01035 static int
01036 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01037 {
01038   int i, r, mod_tlen;
01039   int infinite = IS_REPEAT_INFINITE(qn->upper);
01040   int empty_info = qn->target_empty_info;
01041   int tlen = compile_length_tree(qn->target, reg);
01042 
01043   if (tlen < 0) return tlen;
01044 
01045   if (is_anychar_star_quantifier(qn)) {
01046     r = compile_tree_n_times(qn->target, qn->lower, reg);
01047     if (r) return r;
01048     if (IS_NOT_NULL(qn->next_head_exact)) {
01049       if (IS_MULTILINE(reg->options))
01050         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01051       else
01052         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01053       if (r) return r;
01054       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01055     }
01056     else {
01057       if (IS_MULTILINE(reg->options))
01058         return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01059       else
01060         return add_opcode(reg, OP_ANYCHAR_STAR);
01061     }
01062   }
01063 
01064   if (empty_info != 0)
01065     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01066   else
01067     mod_tlen = tlen;
01068 
01069   if (infinite &&
01070       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01071     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01072       if (qn->greedy) {
01073         if (IS_NOT_NULL(qn->head_exact))
01074           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01075         else if (IS_NOT_NULL(qn->next_head_exact))
01076           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01077         else
01078           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01079       }
01080       else {
01081         r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01082       }
01083       if (r) return r;
01084     }
01085     else {
01086       r = compile_tree_n_times(qn->target, qn->lower, reg);
01087       if (r) return r;
01088     }
01089 
01090     if (qn->greedy) {
01091       if (IS_NOT_NULL(qn->head_exact)) {
01092         r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01093                              mod_tlen + SIZE_OP_JUMP);
01094         if (r) return r;
01095         add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01096         r = compile_tree_empty_check(qn->target, reg, empty_info);
01097         if (r) return r;
01098         r = add_opcode_rel_addr(reg, OP_JUMP,
01099         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01100       }
01101       else if (IS_NOT_NULL(qn->next_head_exact)) {
01102         r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01103                                 mod_tlen + SIZE_OP_JUMP);
01104         if (r) return r;
01105         add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01106         r = compile_tree_empty_check(qn->target, reg, empty_info);
01107         if (r) return r;
01108         r = add_opcode_rel_addr(reg, OP_JUMP,
01109           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01110       }
01111       else {
01112         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01113         if (r) return r;
01114         r = compile_tree_empty_check(qn->target, reg, empty_info);
01115         if (r) return r;
01116         r = add_opcode_rel_addr(reg, OP_JUMP,
01117                      -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01118       }
01119     }
01120     else {
01121       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01122       if (r) return r;
01123       r = compile_tree_empty_check(qn->target, reg, empty_info);
01124       if (r) return r;
01125       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01126     }
01127   }
01128   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01129     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01130     if (r) return r;
01131     r = compile_tree(qn->target, reg);
01132   }
01133   else if (!infinite && qn->greedy &&
01134            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01135                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01136     int n = qn->upper - qn->lower;
01137 
01138     r = compile_tree_n_times(qn->target, qn->lower, reg);
01139     if (r) return r;
01140 
01141     for (i = 0; i < n; i++) {
01142       r = add_opcode_rel_addr(reg, OP_PUSH,
01143                            (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01144       if (r) return r;
01145       r = compile_tree(qn->target, reg);
01146       if (r) return r;
01147     }
01148   }
01149   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01150     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01151     if (r) return r;
01152     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01153     if (r) return r;
01154     r = compile_tree(qn->target, reg);
01155   }
01156   else {
01157     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01158   }
01159   return r;
01160 }
01161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
01162 
01163 static int
01164 compile_length_option_node(EncloseNode* node, regex_t* reg)
01165 {
01166   int tlen;
01167   OnigOptionType prev = reg->options;
01168 
01169   reg->options = node->option;
01170   tlen = compile_length_tree(node->target, reg);
01171   reg->options = prev;
01172 
01173   if (tlen < 0) return tlen;
01174 
01175   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01176     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01177            + tlen + SIZE_OP_SET_OPTION;
01178   }
01179   else
01180     return tlen;
01181 }
01182 
01183 static int
01184 compile_option_node(EncloseNode* node, regex_t* reg)
01185 {
01186   int r;
01187   OnigOptionType prev = reg->options;
01188 
01189   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01190     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01191     if (r) return r;
01192     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01193     if (r) return r;
01194     r = add_opcode(reg, OP_FAIL);
01195     if (r) return r;
01196   }
01197 
01198   reg->options = node->option;
01199   r = compile_tree(node->target, reg);
01200   reg->options = prev;
01201 
01202   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01203     if (r) return r;
01204     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01205   }
01206   return r;
01207 }
01208 
01209 static int
01210 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01211 {
01212   int len;
01213   int tlen;
01214 
01215   if (node->type == ENCLOSE_OPTION)
01216     return compile_length_option_node(node, reg);
01217 
01218   if (node->target) {
01219     tlen = compile_length_tree(node->target, reg);
01220     if (tlen < 0) return tlen;
01221   }
01222   else
01223     tlen = 0;
01224 
01225   switch (node->type) {
01226   case ENCLOSE_MEMORY:
01227 #ifdef USE_SUBEXP_CALL
01228     if (IS_ENCLOSE_CALLED(node)) {
01229       len = SIZE_OP_MEMORY_START_PUSH + tlen
01230           + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01231       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01232         len += (IS_ENCLOSE_RECURSION(node)
01233                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01234       else
01235         len += (IS_ENCLOSE_RECURSION(node)
01236                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01237     }
01238     else
01239 #endif
01240     {
01241       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01242         len = SIZE_OP_MEMORY_START_PUSH;
01243       else
01244         len = SIZE_OP_MEMORY_START;
01245 
01246       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01247                      ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01248     }
01249     break;
01250 
01251   case ENCLOSE_STOP_BACKTRACK:
01252     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01253       QtfrNode* qn = NQTFR(node->target);
01254       tlen = compile_length_tree(qn->target, reg);
01255       if (tlen < 0) return tlen;
01256 
01257       len = tlen * qn->lower
01258           + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01259     }
01260     else {
01261       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01262     }
01263     break;
01264 
01265   default:
01266     return ONIGERR_TYPE_BUG;
01267     break;
01268   }
01269 
01270   return len;
01271 }
01272 
01273 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01274 
01275 static int
01276 compile_enclose_node(EncloseNode* node, regex_t* reg)
01277 {
01278   int r, len;
01279 
01280   if (node->type == ENCLOSE_OPTION)
01281     return compile_option_node(node, reg);
01282 
01283   switch (node->type) {
01284   case ENCLOSE_MEMORY:
01285 #ifdef USE_SUBEXP_CALL
01286     if (IS_ENCLOSE_CALLED(node)) {
01287       r = add_opcode(reg, OP_CALL);
01288       if (r) return r;
01289       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01290       node->state |= NST_ADDR_FIXED;
01291       r = add_abs_addr(reg, (int )node->call_addr);
01292       if (r) return r;
01293       len = compile_length_tree(node->target, reg);
01294       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01295       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01296         len += (IS_ENCLOSE_RECURSION(node)
01297                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01298       else
01299         len += (IS_ENCLOSE_RECURSION(node)
01300                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01301 
01302       r = add_opcode_rel_addr(reg, OP_JUMP, len);
01303       if (r) return r;
01304     }
01305 #endif
01306     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01307       r = add_opcode(reg, OP_MEMORY_START_PUSH);
01308     else
01309       r = add_opcode(reg, OP_MEMORY_START);
01310     if (r) return r;
01311     r = add_mem_num(reg, node->regnum);
01312     if (r) return r;
01313     r = compile_tree(node->target, reg);
01314     if (r) return r;
01315 #ifdef USE_SUBEXP_CALL
01316     if (IS_ENCLOSE_CALLED(node)) {
01317       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01318         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01319                              ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01320       else
01321         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01322                              ? OP_MEMORY_END_REC : OP_MEMORY_END));
01323 
01324       if (r) return r;
01325       r = add_mem_num(reg, node->regnum);
01326       if (r) return r;
01327       r = add_opcode(reg, OP_RETURN);
01328     }
01329     else
01330 #endif
01331     {
01332       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01333         r = add_opcode(reg, OP_MEMORY_END_PUSH);
01334       else
01335         r = add_opcode(reg, OP_MEMORY_END);
01336       if (r) return r;
01337       r = add_mem_num(reg, node->regnum);
01338     }
01339     break;
01340 
01341   case ENCLOSE_STOP_BACKTRACK:
01342     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01343       QtfrNode* qn = NQTFR(node->target);
01344       r = compile_tree_n_times(qn->target, qn->lower, reg);
01345       if (r) return r;
01346 
01347       len = compile_length_tree(qn->target, reg);
01348       if (len < 0) return len;
01349 
01350       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01351       if (r) return r;
01352       r = compile_tree(qn->target, reg);
01353       if (r) return r;
01354       r = add_opcode(reg, OP_POP);
01355       if (r) return r;
01356       r = add_opcode_rel_addr(reg, OP_JUMP,
01357          -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01358     }
01359     else {
01360       r = add_opcode(reg, OP_PUSH_STOP_BT);
01361       if (r) return r;
01362       r = compile_tree(node->target, reg);
01363       if (r) return r;
01364       r = add_opcode(reg, OP_POP_STOP_BT);
01365     }
01366     break;
01367 
01368   default:
01369     return ONIGERR_TYPE_BUG;
01370     break;
01371   }
01372 
01373   return r;
01374 }
01375 
01376 static int
01377 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01378 {
01379   int len;
01380   int tlen = 0;
01381 
01382   if (node->target) {
01383     tlen = compile_length_tree(node->target, reg);
01384     if (tlen < 0) return tlen;
01385   }
01386 
01387   switch (node->type) {
01388   case ANCHOR_PREC_READ:
01389     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01390     break;
01391   case ANCHOR_PREC_READ_NOT:
01392     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01393     break;
01394   case ANCHOR_LOOK_BEHIND:
01395     len = SIZE_OP_LOOK_BEHIND + tlen;
01396     break;
01397   case ANCHOR_LOOK_BEHIND_NOT:
01398     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01399     break;
01400 
01401   default:
01402     len = SIZE_OPCODE;
01403     break;
01404   }
01405 
01406   return len;
01407 }
01408 
01409 static int
01410 compile_anchor_node(AnchorNode* node, regex_t* reg)
01411 {
01412   int r, len;
01413 
01414   switch (node->type) {
01415   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
01416   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
01417   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
01418   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
01419   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
01420   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01421 
01422   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
01423   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
01424 #ifdef USE_WORD_BEGIN_END
01425   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
01426   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
01427 #endif
01428 
01429   case ANCHOR_PREC_READ:
01430     r = add_opcode(reg, OP_PUSH_POS);
01431     if (r) return r;
01432     r = compile_tree(node->target, reg);
01433     if (r) return r;
01434     r = add_opcode(reg, OP_POP_POS);
01435     break;
01436 
01437   case ANCHOR_PREC_READ_NOT:
01438     len = compile_length_tree(node->target, reg);
01439     if (len < 0) return len;
01440     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01441     if (r) return r;
01442     r = compile_tree(node->target, reg);
01443     if (r) return r;
01444     r = add_opcode(reg, OP_FAIL_POS);
01445     break;
01446 
01447   case ANCHOR_LOOK_BEHIND:
01448     {
01449       int n;
01450       r = add_opcode(reg, OP_LOOK_BEHIND);
01451       if (r) return r;
01452       if (node->char_len < 0) {
01453         r = get_char_length_tree(node->target, reg, &n);
01454         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01455       }
01456       else
01457         n = node->char_len;
01458       r = add_length(reg, n);
01459       if (r) return r;
01460       r = compile_tree(node->target, reg);
01461     }
01462     break;
01463 
01464   case ANCHOR_LOOK_BEHIND_NOT:
01465     {
01466       int n;
01467       len = compile_length_tree(node->target, reg);
01468       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01469                            len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01470       if (r) return r;
01471       if (node->char_len < 0) {
01472         r = get_char_length_tree(node->target, reg, &n);
01473         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01474       }
01475       else
01476         n = node->char_len;
01477       r = add_length(reg, n);
01478       if (r) return r;
01479       r = compile_tree(node->target, reg);
01480       if (r) return r;
01481       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01482     }
01483     break;
01484 
01485   default:
01486     return ONIGERR_TYPE_BUG;
01487     break;
01488   }
01489 
01490   return r;
01491 }
01492 
01493 static int
01494 compile_length_tree(Node* node, regex_t* reg)
01495 {
01496   int len, type, r;
01497 
01498   type = NTYPE(node);
01499   switch (type) {
01500   case NT_LIST:
01501     len = 0;
01502     do {
01503       r = compile_length_tree(NCAR(node), reg);
01504       if (r < 0) return r;
01505       len += r;
01506     } while (IS_NOT_NULL(node = NCDR(node)));
01507     r = len;
01508     break;
01509 
01510   case NT_ALT:
01511     {
01512       int n;
01513 
01514       n = r = 0;
01515       do {
01516         r += compile_length_tree(NCAR(node), reg);
01517         n++;
01518       } while (IS_NOT_NULL(node = NCDR(node)));
01519       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01520     }
01521     break;
01522 
01523   case NT_STR:
01524     if (NSTRING_IS_RAW(node))
01525       r = compile_length_string_raw_node(NSTR(node), reg);
01526     else
01527       r = compile_length_string_node(node, reg);
01528     break;
01529 
01530   case NT_CCLASS:
01531     r = compile_length_cclass_node(NCCLASS(node), reg);
01532     break;
01533 
01534   case NT_CTYPE:
01535   case NT_CANY:
01536     r = SIZE_OPCODE;
01537     break;
01538 
01539   case NT_BREF:
01540     {
01541       BRefNode* br = NBREF(node);
01542 
01543 #ifdef USE_BACKREF_WITH_LEVEL
01544       if (IS_BACKREF_NEST_LEVEL(br)) {
01545         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01546             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01547       }
01548       else
01549 #endif
01550       if (br->back_num == 1) {
01551         r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01552              ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01553       }
01554       else {
01555         r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01556       }
01557     }
01558     break;
01559 
01560 #ifdef USE_SUBEXP_CALL
01561   case NT_CALL:
01562     r = SIZE_OP_CALL;
01563     break;
01564 #endif
01565 
01566   case NT_QTFR:
01567     r = compile_length_quantifier_node(NQTFR(node), reg);
01568     break;
01569 
01570   case NT_ENCLOSE:
01571     r = compile_length_enclose_node(NENCLOSE(node), reg);
01572     break;
01573 
01574   case NT_ANCHOR:
01575     r = compile_length_anchor_node(NANCHOR(node), reg);
01576     break;
01577 
01578   default:
01579     return ONIGERR_TYPE_BUG;
01580     break;
01581   }
01582 
01583   return r;
01584 }
01585 
01586 static int
01587 compile_tree(Node* node, regex_t* reg)
01588 {
01589   int n, type, len, pos, r = 0;
01590 
01591   type = NTYPE(node);
01592   switch (type) {
01593   case NT_LIST:
01594     do {
01595       r = compile_tree(NCAR(node), reg);
01596     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01597     break;
01598 
01599   case NT_ALT:
01600     {
01601       Node* x = node;
01602       len = 0;
01603       do {
01604         len += compile_length_tree(NCAR(x), reg);
01605         if (NCDR(x) != NULL) {
01606           len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01607         }
01608       } while (IS_NOT_NULL(x = NCDR(x)));
01609       pos = reg->used + len;  /* goal position */
01610 
01611       do {
01612         len = compile_length_tree(NCAR(node), reg);
01613         if (IS_NOT_NULL(NCDR(node))) {
01614           r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01615           if (r) break;
01616         }
01617         r = compile_tree(NCAR(node), reg);
01618         if (r) break;
01619         if (IS_NOT_NULL(NCDR(node))) {
01620           len = pos - (reg->used + SIZE_OP_JUMP);
01621           r = add_opcode_rel_addr(reg, OP_JUMP, len);
01622           if (r) break;
01623         }
01624       } while (IS_NOT_NULL(node = NCDR(node)));
01625     }
01626     break;
01627 
01628   case NT_STR:
01629     if (NSTRING_IS_RAW(node))
01630       r = compile_string_raw_node(NSTR(node), reg);
01631     else
01632       r = compile_string_node(node, reg);
01633     break;
01634 
01635   case NT_CCLASS:
01636     r = compile_cclass_node(NCCLASS(node), reg);
01637     break;
01638 
01639   case NT_CTYPE:
01640     {
01641       int op;
01642 
01643       switch (NCTYPE(node)->ctype) {
01644       case ONIGENC_CTYPE_WORD:
01645         if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
01646         else                         op = OP_WORD;
01647         break;
01648       default:
01649         return ONIGERR_TYPE_BUG;
01650         break;
01651       }
01652       r = add_opcode(reg, op);
01653     }
01654     break;
01655 
01656   case NT_CANY:
01657     if (IS_MULTILINE(reg->options))
01658       r = add_opcode(reg, OP_ANYCHAR_ML);
01659     else
01660       r = add_opcode(reg, OP_ANYCHAR);
01661     break;
01662 
01663   case NT_BREF:
01664     {
01665       BRefNode* br = NBREF(node);
01666 
01667 #ifdef USE_BACKREF_WITH_LEVEL
01668       if (IS_BACKREF_NEST_LEVEL(br)) {
01669         r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01670         if (r) return r;
01671         r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01672         if (r) return r;
01673         r = add_length(reg, br->nest_level);
01674         if (r) return r;
01675 
01676         goto add_bacref_mems;
01677       }
01678       else
01679 #endif
01680       if (br->back_num == 1) {
01681         n = br->back_static[0];
01682         if (IS_IGNORECASE(reg->options)) {
01683           r = add_opcode(reg, OP_BACKREFN_IC);
01684           if (r) return r;
01685           r = add_mem_num(reg, n);
01686         }
01687         else {
01688           switch (n) {
01689           case 1:  r = add_opcode(reg, OP_BACKREF1); break;
01690           case 2:  r = add_opcode(reg, OP_BACKREF2); break;
01691           default:
01692             r = add_opcode(reg, OP_BACKREFN);
01693             if (r) return r;
01694             r = add_mem_num(reg, n);
01695             break;
01696           }
01697         }
01698       }
01699       else {
01700         int i;
01701         int* p;
01702 
01703         if (IS_IGNORECASE(reg->options)) {
01704           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01705         }
01706         else {
01707           r = add_opcode(reg, OP_BACKREF_MULTI);
01708         }
01709         if (r) return r;
01710 
01711 #ifdef USE_BACKREF_WITH_LEVEL
01712       add_bacref_mems:
01713 #endif
01714         r = add_length(reg, br->back_num);
01715         if (r) return r;
01716         p = BACKREFS_P(br);
01717         for (i = br->back_num - 1; i >= 0; i--) {
01718           r = add_mem_num(reg, p[i]);
01719           if (r) return r;
01720         }
01721       }
01722     }
01723     break;
01724 
01725 #ifdef USE_SUBEXP_CALL
01726   case NT_CALL:
01727     r = compile_call(NCALL(node), reg);
01728     break;
01729 #endif
01730 
01731   case NT_QTFR:
01732     r = compile_quantifier_node(NQTFR(node), reg);
01733     break;
01734 
01735   case NT_ENCLOSE:
01736     r = compile_enclose_node(NENCLOSE(node), reg);
01737     break;
01738 
01739   case NT_ANCHOR:
01740     r = compile_anchor_node(NANCHOR(node), reg);
01741     break;
01742 
01743   default:
01744 #ifdef ONIG_DEBUG
01745     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01746 #endif
01747     break;
01748   }
01749 
01750   return r;
01751 }
01752 
01753 #ifdef USE_NAMED_GROUP
01754 
01755 static int
01756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01757 {
01758   int r = 0;
01759   Node* node = *plink;
01760 
01761   switch (NTYPE(node)) {
01762   case NT_LIST:
01763   case NT_ALT:
01764     do {
01765       r = noname_disable_map(&(NCAR(node)), map, counter);
01766     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01767     break;
01768 
01769   case NT_QTFR:
01770     {
01771       Node** ptarget = &(NQTFR(node)->target);
01772       Node*  old = *ptarget;
01773       r = noname_disable_map(ptarget, map, counter);
01774       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01775         onig_reduce_nested_quantifier(node, *ptarget);
01776       }
01777     }
01778     break;
01779 
01780   case NT_ENCLOSE:
01781     {
01782       EncloseNode* en = NENCLOSE(node);
01783       if (en->type == ENCLOSE_MEMORY) {
01784         if (IS_ENCLOSE_NAMED_GROUP(en)) {
01785           (*counter)++;
01786           map[en->regnum].new_val = *counter;
01787           en->regnum = *counter;
01788           r = noname_disable_map(&(en->target), map, counter);
01789         }
01790         else {
01791           *plink = en->target;
01792           en->target = NULL_NODE;
01793           onig_node_free(node);
01794           r = noname_disable_map(plink, map, counter);
01795         }
01796       }
01797       else
01798         r = noname_disable_map(&(en->target), map, counter);
01799     }
01800     break;
01801 
01802   case NT_ANCHOR:
01803     {
01804       AnchorNode* an = NANCHOR(node);
01805       switch (an->type) {
01806       case ANCHOR_PREC_READ:
01807       case ANCHOR_PREC_READ_NOT:
01808       case ANCHOR_LOOK_BEHIND:
01809       case ANCHOR_LOOK_BEHIND_NOT:
01810         r = noname_disable_map(&(an->target), map, counter);
01811         break;
01812       }
01813     }
01814     break;
01815 
01816   default:
01817     break;
01818   }
01819 
01820   return r;
01821 }
01822 
01823 static int
01824 renumber_node_backref(Node* node, GroupNumRemap* map)
01825 {
01826   int i, pos, n, old_num;
01827   int *backs;
01828   BRefNode* bn = NBREF(node);
01829 
01830   if (! IS_BACKREF_NAME_REF(bn))
01831     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01832 
01833   old_num = bn->back_num;
01834   if (IS_NULL(bn->back_dynamic))
01835     backs = bn->back_static;
01836   else
01837     backs = bn->back_dynamic;
01838 
01839   for (i = 0, pos = 0; i < old_num; i++) {
01840     n = map[backs[i]].new_val;
01841     if (n > 0) {
01842       backs[pos] = n;
01843       pos++;
01844     }
01845   }
01846 
01847   bn->back_num = pos;
01848   return 0;
01849 }
01850 
01851 static int
01852 renumber_by_map(Node* node, GroupNumRemap* map)
01853 {
01854   int r = 0;
01855 
01856   switch (NTYPE(node)) {
01857   case NT_LIST:
01858   case NT_ALT:
01859     do {
01860       r = renumber_by_map(NCAR(node), map);
01861     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01862     break;
01863   case NT_QTFR:
01864     r = renumber_by_map(NQTFR(node)->target, map);
01865     break;
01866   case NT_ENCLOSE:
01867     r = renumber_by_map(NENCLOSE(node)->target, map);
01868     break;
01869 
01870   case NT_BREF:
01871     r = renumber_node_backref(node, map);
01872     break;
01873 
01874   case NT_ANCHOR:
01875     {
01876       AnchorNode* an = NANCHOR(node);
01877       switch (an->type) {
01878       case ANCHOR_PREC_READ:
01879       case ANCHOR_PREC_READ_NOT:
01880       case ANCHOR_LOOK_BEHIND:
01881       case ANCHOR_LOOK_BEHIND_NOT:
01882         r = renumber_by_map(an->target, map);
01883         break;
01884       }
01885     }
01886     break;
01887 
01888   default:
01889     break;
01890   }
01891 
01892   return r;
01893 }
01894 
01895 static int
01896 numbered_ref_check(Node* node)
01897 {
01898   int r = 0;
01899 
01900   switch (NTYPE(node)) {
01901   case NT_LIST:
01902   case NT_ALT:
01903     do {
01904       r = numbered_ref_check(NCAR(node));
01905     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01906     break;
01907   case NT_QTFR:
01908     r = numbered_ref_check(NQTFR(node)->target);
01909     break;
01910   case NT_ENCLOSE:
01911     r = numbered_ref_check(NENCLOSE(node)->target);
01912     break;
01913 
01914   case NT_BREF:
01915     if (! IS_BACKREF_NAME_REF(NBREF(node)))
01916       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01917     break;
01918 
01919   default:
01920     break;
01921   }
01922 
01923   return r;
01924 }
01925 
01926 static int
01927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
01928 {
01929   int r, i, pos, counter;
01930   BitStatusType loc;
01931   GroupNumRemap* map;
01932 
01933   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
01934   CHECK_NULL_RETURN_MEMERR(map);
01935   for (i = 1; i <= env->num_mem; i++) {
01936     map[i].new_val = 0;
01937   }
01938   counter = 0;
01939   r = noname_disable_map(root, map, &counter);
01940   if (r != 0) return r;
01941 
01942   r = renumber_by_map(*root, map);
01943   if (r != 0) return r;
01944 
01945   for (i = 1, pos = 1; i <= env->num_mem; i++) {
01946     if (map[i].new_val > 0) {
01947       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
01948       pos++;
01949     }
01950   }
01951 
01952   loc = env->capture_history;
01953   BIT_STATUS_CLEAR(env->capture_history);
01954   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
01955     if (BIT_STATUS_AT(loc, i)) {
01956       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
01957     }
01958   }
01959 
01960   env->num_mem = env->num_named;
01961   reg->num_mem = env->num_named;
01962 
01963   return onig_renumber_name_table(reg, map);
01964 }
01965 #endif /* USE_NAMED_GROUP */
01966 
01967 #ifdef USE_SUBEXP_CALL
01968 static int
01969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
01970 {
01971   int i, offset;
01972   EncloseNode* en;
01973   AbsAddrType addr;
01974 
01975   for (i = 0; i < uslist->num; i++) {
01976     en = NENCLOSE(uslist->us[i].target);
01977     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
01978     addr = en->call_addr;
01979     offset = uslist->us[i].offset;
01980 
01981     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
01982   }
01983   return 0;
01984 }
01985 #endif
01986 
01987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
01988 static int
01989 quantifiers_memory_node_info(Node* node)
01990 {
01991   int r = 0;
01992 
01993   switch (NTYPE(node)) {
01994   case NT_LIST:
01995   case NT_ALT:
01996     {
01997       int v;
01998       do {
01999         v = quantifiers_memory_node_info(NCAR(node));
02000         if (v > r) r = v;
02001       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
02002     }
02003     break;
02004 
02005 #ifdef USE_SUBEXP_CALL
02006   case NT_CALL:
02007     if (IS_CALL_RECURSION(NCALL(node))) {
02008       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
02009     }
02010     else
02011       r = quantifiers_memory_node_info(NCALL(node)->target);
02012     break;
02013 #endif
02014 
02015   case NT_QTFR:
02016     {
02017       QtfrNode* qn = NQTFR(node);
02018       if (qn->upper != 0) {
02019         r = quantifiers_memory_node_info(qn->target);
02020       }
02021     }
02022     break;
02023 
02024   case NT_ENCLOSE:
02025     {
02026       EncloseNode* en = NENCLOSE(node);
02027       switch (en->type) {
02028       case ENCLOSE_MEMORY:
02029         return NQ_TARGET_IS_EMPTY_MEM;
02030         break;
02031 
02032       case ENCLOSE_OPTION:
02033       case ENCLOSE_STOP_BACKTRACK:
02034         r = quantifiers_memory_node_info(en->target);
02035         break;
02036       default:
02037         break;
02038       }
02039     }
02040     break;
02041 
02042   case NT_BREF:
02043   case NT_STR:
02044   case NT_CTYPE:
02045   case NT_CCLASS:
02046   case NT_CANY:
02047   case NT_ANCHOR:
02048   default:
02049     break;
02050   }
02051 
02052   return r;
02053 }
02054 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
02055 
02056 static int
02057 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02058 {
02059   OnigDistance tmin;
02060   int r = 0;
02061 
02062   *min = 0;
02063   switch (NTYPE(node)) {
02064   case NT_BREF:
02065     {
02066       int i;
02067       int* backs;
02068       Node** nodes = SCANENV_MEM_NODES(env);
02069       BRefNode* br = NBREF(node);
02070       if (br->state & NST_RECURSION) break;
02071 
02072       backs = BACKREFS_P(br);
02073       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02074       r = get_min_match_length(nodes[backs[0]], min, env);
02075       if (r != 0) break;
02076       for (i = 1; i < br->back_num; i++) {
02077         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02078         r = get_min_match_length(nodes[backs[i]], &tmin, env);
02079         if (r != 0) break;
02080         if (*min > tmin) *min = tmin;
02081       }
02082     }
02083     break;
02084 
02085 #ifdef USE_SUBEXP_CALL
02086   case NT_CALL:
02087     if (IS_CALL_RECURSION(NCALL(node))) {
02088       EncloseNode* en = NENCLOSE(NCALL(node)->target);
02089       if (IS_ENCLOSE_MIN_FIXED(en))
02090         *min = en->min_len;
02091     }
02092     else
02093       r = get_min_match_length(NCALL(node)->target, min, env);
02094     break;
02095 #endif
02096 
02097   case NT_LIST:
02098     do {
02099       r = get_min_match_length(NCAR(node), &tmin, env);
02100       if (r == 0) *min += tmin;
02101     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02102     break;
02103 
02104   case NT_ALT:
02105     {
02106       Node *x, *y;
02107       y = node;
02108       do {
02109         x = NCAR(y);
02110         r = get_min_match_length(x, &tmin, env);
02111         if (r != 0) break;
02112         if (y == node) *min = tmin;
02113         else if (*min > tmin) *min = tmin;
02114       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02115     }
02116     break;
02117 
02118   case NT_STR:
02119     {
02120       StrNode* sn = NSTR(node);
02121       *min = sn->end - sn->s;
02122     }
02123     break;
02124 
02125   case NT_CTYPE:
02126     *min = 1;
02127     break;
02128 
02129   case NT_CCLASS:
02130   case NT_CANY:
02131     *min = 1;
02132     break;
02133 
02134   case NT_QTFR:
02135     {
02136       QtfrNode* qn = NQTFR(node);
02137 
02138       if (qn->lower > 0) {
02139         r = get_min_match_length(qn->target, min, env);
02140         if (r == 0)
02141           *min = distance_multiply(*min, qn->lower);
02142       }
02143     }
02144     break;
02145 
02146   case NT_ENCLOSE:
02147     {
02148       EncloseNode* en = NENCLOSE(node);
02149       switch (en->type) {
02150       case ENCLOSE_MEMORY:
02151 #ifdef USE_SUBEXP_CALL
02152         if (IS_ENCLOSE_MIN_FIXED(en))
02153           *min = en->min_len;
02154         else {
02155           r = get_min_match_length(en->target, min, env);
02156           if (r == 0) {
02157             en->min_len = *min;
02158             SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02159           }
02160         }
02161         break;
02162 #endif
02163       case ENCLOSE_OPTION:
02164       case ENCLOSE_STOP_BACKTRACK:
02165         r = get_min_match_length(en->target, min, env);
02166         break;
02167       }
02168     }
02169     break;
02170 
02171   case NT_ANCHOR:
02172   default:
02173     break;
02174   }
02175 
02176   return r;
02177 }
02178 
02179 static int
02180 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02181 {
02182   OnigDistance tmax;
02183   int r = 0;
02184 
02185   *max = 0;
02186   switch (NTYPE(node)) {
02187   case NT_LIST:
02188     do {
02189       r = get_max_match_length(NCAR(node), &tmax, env);
02190       if (r == 0)
02191         *max = distance_add(*max, tmax);
02192     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02193     break;
02194 
02195   case NT_ALT:
02196     do {
02197       r = get_max_match_length(NCAR(node), &tmax, env);
02198       if (r == 0 && *max < tmax) *max = tmax;
02199     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02200     break;
02201 
02202   case NT_STR:
02203     {
02204       StrNode* sn = NSTR(node);
02205       *max = sn->end - sn->s;
02206     }
02207     break;
02208 
02209   case NT_CTYPE:
02210     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02211     break;
02212 
02213   case NT_CCLASS:
02214   case NT_CANY:
02215     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02216     break;
02217 
02218   case NT_BREF:
02219     {
02220       int i;
02221       int* backs;
02222       Node** nodes = SCANENV_MEM_NODES(env);
02223       BRefNode* br = NBREF(node);
02224       if (br->state & NST_RECURSION) {
02225         *max = ONIG_INFINITE_DISTANCE;
02226         break;
02227       }
02228       backs = BACKREFS_P(br);
02229       for (i = 0; i < br->back_num; i++) {
02230         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02231         r = get_max_match_length(nodes[backs[i]], &tmax, env);
02232         if (r != 0) break;
02233         if (*max < tmax) *max = tmax;
02234       }
02235     }
02236     break;
02237 
02238 #ifdef USE_SUBEXP_CALL
02239   case NT_CALL:
02240     if (! IS_CALL_RECURSION(NCALL(node)))
02241       r = get_max_match_length(NCALL(node)->target, max, env);
02242     else
02243       *max = ONIG_INFINITE_DISTANCE;
02244     break;
02245 #endif
02246 
02247   case NT_QTFR:
02248     {
02249       QtfrNode* qn = NQTFR(node);
02250 
02251       if (qn->upper != 0) {
02252         r = get_max_match_length(qn->target, max, env);
02253         if (r == 0 && *max != 0) {
02254           if (! IS_REPEAT_INFINITE(qn->upper))
02255             *max = distance_multiply(*max, qn->upper);
02256           else
02257             *max = ONIG_INFINITE_DISTANCE;
02258         }
02259       }
02260     }
02261     break;
02262 
02263   case NT_ENCLOSE:
02264     {
02265       EncloseNode* en = NENCLOSE(node);
02266       switch (en->type) {
02267       case ENCLOSE_MEMORY:
02268 #ifdef USE_SUBEXP_CALL
02269         if (IS_ENCLOSE_MAX_FIXED(en))
02270           *max = en->max_len;
02271         else {
02272           r = get_max_match_length(en->target, max, env);
02273           if (r == 0) {
02274             en->max_len = *max;
02275             SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02276           }
02277         }
02278         break;
02279 #endif
02280       case ENCLOSE_OPTION:
02281       case ENCLOSE_STOP_BACKTRACK:
02282         r = get_max_match_length(en->target, max, env);
02283         break;
02284       }
02285     }
02286     break;
02287 
02288   case NT_ANCHOR:
02289   default:
02290     break;
02291   }
02292 
02293   return r;
02294 }
02295 
02296 #define GET_CHAR_LEN_VARLEN           -1
02297 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
02298 
02299 /* fixed size pattern node only */
02300 static int
02301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02302 {
02303   int tlen;
02304   int r = 0;
02305 
02306   level++;
02307   *len = 0;
02308   switch (NTYPE(node)) {
02309   case NT_LIST:
02310     do {
02311       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02312       if (r == 0)
02313         *len = (int)distance_add(*len, tlen);
02314     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02315     break;
02316 
02317   case NT_ALT:
02318     {
02319       int tlen2;
02320       int varlen = 0;
02321 
02322       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02323       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02324         r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02325         if (r == 0) {
02326           if (tlen != tlen2)
02327             varlen = 1;
02328         }
02329       }
02330       if (r == 0) {
02331         if (varlen != 0) {
02332           if (level == 1)
02333             r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02334           else
02335             r = GET_CHAR_LEN_VARLEN;
02336         }
02337         else
02338           *len = tlen;
02339       }
02340     }
02341     break;
02342 
02343   case NT_STR:
02344     {
02345       StrNode* sn = NSTR(node);
02346       UChar *s = sn->s;
02347       while (s < sn->end) {
02348         s += enclen(reg->enc, s, sn->end);
02349         (*len)++;
02350       }
02351     }
02352     break;
02353 
02354   case NT_QTFR:
02355     {
02356       QtfrNode* qn = NQTFR(node);
02357       if (qn->lower == qn->upper) {
02358         r = get_char_length_tree1(qn->target, reg, &tlen, level);
02359         if (r == 0)
02360           *len = (int)distance_multiply(tlen, qn->lower);
02361       }
02362       else
02363         r = GET_CHAR_LEN_VARLEN;
02364     }
02365     break;
02366 
02367 #ifdef USE_SUBEXP_CALL
02368   case NT_CALL:
02369     if (! IS_CALL_RECURSION(NCALL(node)))
02370       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02371     else
02372       r = GET_CHAR_LEN_VARLEN;
02373     break;
02374 #endif
02375 
02376   case NT_CTYPE:
02377     *len = 1;
02378     break;
02379 
02380   case NT_CCLASS:
02381   case NT_CANY:
02382     *len = 1;
02383     break;
02384 
02385   case NT_ENCLOSE:
02386     {
02387       EncloseNode* en = NENCLOSE(node);
02388       switch (en->type) {
02389       case ENCLOSE_MEMORY:
02390 #ifdef USE_SUBEXP_CALL
02391         if (IS_ENCLOSE_CLEN_FIXED(en))
02392           *len = en->char_len;
02393         else {
02394           r = get_char_length_tree1(en->target, reg, len, level);
02395           if (r == 0) {
02396             en->char_len = *len;
02397             SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02398           }
02399         }
02400         break;
02401 #endif
02402       case ENCLOSE_OPTION:
02403       case ENCLOSE_STOP_BACKTRACK:
02404         r = get_char_length_tree1(en->target, reg, len, level);
02405         break;
02406       default:
02407         break;
02408       }
02409     }
02410     break;
02411 
02412   case NT_ANCHOR:
02413     break;
02414 
02415   default:
02416     r = GET_CHAR_LEN_VARLEN;
02417     break;
02418   }
02419 
02420   return r;
02421 }
02422 
02423 static int
02424 get_char_length_tree(Node* node, regex_t* reg, int* len)
02425 {
02426   return get_char_length_tree1(node, reg, len, 0);
02427 }
02428 
02429 /* x is not included y ==>  1 : 0 */
02430 static int
02431 is_not_included(Node* x, Node* y, regex_t* reg)
02432 {
02433   int i;
02434   OnigDistance len;
02435   OnigCodePoint code;
02436   UChar *p, c;
02437   int ytype;
02438 
02439  retry:
02440   ytype = NTYPE(y);
02441   switch (NTYPE(x)) {
02442   case NT_CTYPE:
02443     {
02444       switch (ytype) {
02445       case NT_CTYPE:
02446         if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02447             NCTYPE(y)->not   != NCTYPE(x)->not)
02448           return 1;
02449         else
02450           return 0;
02451         break;
02452 
02453       case NT_CCLASS:
02454       swap:
02455         {
02456           Node* tmp;
02457           tmp = x; x = y; y = tmp;
02458           goto retry;
02459         }
02460         break;
02461 
02462       case NT_STR:
02463         goto swap;
02464         break;
02465 
02466       default:
02467         break;
02468       }
02469     }
02470     break;
02471 
02472   case NT_CCLASS:
02473     {
02474       CClassNode* xc = NCCLASS(x);
02475       switch (ytype) {
02476       case NT_CTYPE:
02477         switch (NCTYPE(y)->ctype) {
02478         case ONIGENC_CTYPE_WORD:
02479           if (NCTYPE(y)->not == 0) {
02480             if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02481               for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02482                 if (BITSET_AT(xc->bs, i)) {
02483                   if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02484                 }
02485               }
02486               return 1;
02487             }
02488             return 0;
02489           }
02490           else {
02491             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02492               if (! IS_CODE_SB_WORD(reg->enc, i)) {
02493                 if (!IS_NCCLASS_NOT(xc)) {
02494                   if (BITSET_AT(xc->bs, i))
02495                     return 0;
02496                 }
02497                 else {
02498                   if (! BITSET_AT(xc->bs, i))
02499                     return 0;
02500                 }
02501               }
02502             }
02503             return 1;
02504           }
02505           break;
02506 
02507         default:
02508           break;
02509         }
02510         break;
02511 
02512       case NT_CCLASS:
02513         {
02514           int v;
02515           CClassNode* yc = NCCLASS(y);
02516 
02517           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02518             v = BITSET_AT(xc->bs, i);
02519             if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02520                 (v == 0 && IS_NCCLASS_NOT(xc))) {
02521               v = BITSET_AT(yc->bs, i);
02522               if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02523                   (v == 0 && IS_NCCLASS_NOT(yc)))
02524                 return 0;
02525             }
02526           }
02527           if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02528               (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02529             return 1;
02530           return 0;
02531         }
02532         break;
02533 
02534       case NT_STR:
02535         goto swap;
02536         break;
02537 
02538       default:
02539         break;
02540       }
02541     }
02542     break;
02543 
02544   case NT_STR:
02545     {
02546       StrNode* xs = NSTR(x);
02547       if (NSTRING_LEN(x) == 0)
02548         break;
02549 
02550       c = *(xs->s);
02551       switch (ytype) {
02552       case NT_CTYPE:
02553         switch (NCTYPE(y)->ctype) {
02554         case ONIGENC_CTYPE_WORD:
02555           if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02556             return NCTYPE(y)->not;
02557           else
02558             return !(NCTYPE(y)->not);
02559           break;
02560         default:
02561           break;
02562         }
02563         break;
02564 
02565       case NT_CCLASS:
02566         {
02567           CClassNode* cc = NCCLASS(y);
02568 
02569           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02570                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02571           return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02572         }
02573         break;
02574 
02575       case NT_STR:
02576         {
02577           UChar *q;
02578           StrNode* ys = NSTR(y);
02579           len = NSTRING_LEN(x);
02580           if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02581           if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02582             /* tiny version */
02583             return 0;
02584           }
02585           else {
02586             for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) {
02587               if (*p != *q) return 1;
02588             }
02589           }
02590         }
02591         break;
02592 
02593       default:
02594         break;
02595       }
02596     }
02597     break;
02598 
02599   default:
02600     break;
02601   }
02602 
02603   return 0;
02604 }
02605 
02606 static Node*
02607 get_head_value_node(Node* node, int exact, regex_t* reg)
02608 {
02609   Node* n = NULL_NODE;
02610 
02611   switch (NTYPE(node)) {
02612   case NT_BREF:
02613   case NT_ALT:
02614   case NT_CANY:
02615 #ifdef USE_SUBEXP_CALL
02616   case NT_CALL:
02617 #endif
02618     break;
02619 
02620   case NT_CTYPE:
02621   case NT_CCLASS:
02622     if (exact == 0) {
02623       n = node;
02624     }
02625     break;
02626 
02627   case NT_LIST:
02628     n = get_head_value_node(NCAR(node), exact, reg);
02629     break;
02630 
02631   case NT_STR:
02632     {
02633       StrNode* sn = NSTR(node);
02634 
02635       if (sn->end <= sn->s)
02636         break;
02637 
02638       if (exact != 0 &&
02639           !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02640       }
02641       else {
02642         n = node;
02643       }
02644     }
02645     break;
02646 
02647   case NT_QTFR:
02648     {
02649       QtfrNode* qn = NQTFR(node);
02650       if (qn->lower > 0) {
02651         if (IS_NOT_NULL(qn->head_exact))
02652           n = qn->head_exact;
02653         else
02654           n = get_head_value_node(qn->target, exact, reg);
02655       }
02656     }
02657     break;
02658 
02659   case NT_ENCLOSE:
02660     {
02661       EncloseNode* en = NENCLOSE(node);
02662       switch (en->type) {
02663       case ENCLOSE_OPTION:
02664         {
02665           OnigOptionType options = reg->options;
02666 
02667           reg->options = NENCLOSE(node)->option;
02668           n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02669           reg->options = options;
02670         }
02671         break;
02672 
02673       case ENCLOSE_MEMORY:
02674       case ENCLOSE_STOP_BACKTRACK:
02675         n = get_head_value_node(en->target, exact, reg);
02676         break;
02677       }
02678     }
02679     break;
02680 
02681   case NT_ANCHOR:
02682     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02683       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02684     break;
02685 
02686   default:
02687     break;
02688   }
02689 
02690   return n;
02691 }
02692 
02693 static int
02694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02695 {
02696   int type, r = 0;
02697 
02698   type = NTYPE(node);
02699   if ((NTYPE2BIT(type) & type_mask) == 0)
02700     return 1;
02701 
02702   switch (type) {
02703   case NT_LIST:
02704   case NT_ALT:
02705     do {
02706       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02707                           anchor_mask);
02708     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02709     break;
02710 
02711   case NT_QTFR:
02712     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02713                         anchor_mask);
02714     break;
02715 
02716   case NT_ENCLOSE:
02717     {
02718       EncloseNode* en = NENCLOSE(node);
02719       if ((en->type & enclose_mask) == 0)
02720         return 1;
02721 
02722       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02723     }
02724     break;
02725 
02726   case NT_ANCHOR:
02727     type = NANCHOR(node)->type;
02728     if ((type & anchor_mask) == 0)
02729       return 1;
02730 
02731     if (NANCHOR(node)->target)
02732       r = check_type_tree(NANCHOR(node)->target,
02733                           type_mask, enclose_mask, anchor_mask);
02734     break;
02735 
02736   default:
02737     break;
02738   }
02739   return r;
02740 }
02741 
02742 #ifdef USE_SUBEXP_CALL
02743 
02744 #define RECURSION_EXIST       1
02745 #define RECURSION_INFINITE    2
02746 
02747 static int
02748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02749 {
02750   int type;
02751   int r = 0;
02752 
02753   type = NTYPE(node);
02754   switch (type) {
02755   case NT_LIST:
02756     {
02757       Node *x;
02758       OnigDistance min;
02759       int ret;
02760 
02761       x = node;
02762       do {
02763         ret = subexp_inf_recursive_check(NCAR(x), env, head);
02764         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02765         r |= ret;
02766         if (head) {
02767           ret = get_min_match_length(NCAR(x), &min, env);
02768           if (ret != 0) return ret;
02769           if (min != 0) head = 0;
02770         }
02771       } while (IS_NOT_NULL(x = NCDR(x)));
02772     }
02773     break;
02774 
02775   case NT_ALT:
02776     {
02777       int ret;
02778       r = RECURSION_EXIST;
02779       do {
02780         ret = subexp_inf_recursive_check(NCAR(node), env, head);
02781         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02782         r &= ret;
02783       } while (IS_NOT_NULL(node = NCDR(node)));
02784     }
02785     break;
02786 
02787   case NT_QTFR:
02788     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02789     if (r == RECURSION_EXIST) {
02790       if (NQTFR(node)->lower == 0) r = 0;
02791     }
02792     break;
02793 
02794   case NT_ANCHOR:
02795     {
02796       AnchorNode* an = NANCHOR(node);
02797       switch (an->type) {
02798       case ANCHOR_PREC_READ:
02799       case ANCHOR_PREC_READ_NOT:
02800       case ANCHOR_LOOK_BEHIND:
02801       case ANCHOR_LOOK_BEHIND_NOT:
02802         r = subexp_inf_recursive_check(an->target, env, head);
02803         break;
02804       }
02805     }
02806     break;
02807 
02808   case NT_CALL:
02809     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02810     break;
02811 
02812   case NT_ENCLOSE:
02813     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02814       return 0;
02815     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02816       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02817     else {
02818       SET_ENCLOSE_STATUS(node, NST_MARK2);
02819       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02820       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02821     }
02822     break;
02823 
02824   default:
02825     break;
02826   }
02827 
02828   return r;
02829 }
02830 
02831 static int
02832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02833 {
02834   int type;
02835   int r = 0;
02836 
02837   type = NTYPE(node);
02838   switch (type) {
02839   case NT_LIST:
02840   case NT_ALT:
02841     do {
02842       r = subexp_inf_recursive_check_trav(NCAR(node), env);
02843     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02844     break;
02845 
02846   case NT_QTFR:
02847     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02848     break;
02849 
02850   case NT_ANCHOR:
02851     {
02852       AnchorNode* an = NANCHOR(node);
02853       switch (an->type) {
02854       case ANCHOR_PREC_READ:
02855       case ANCHOR_PREC_READ_NOT:
02856       case ANCHOR_LOOK_BEHIND:
02857       case ANCHOR_LOOK_BEHIND_NOT:
02858         r = subexp_inf_recursive_check_trav(an->target, env);
02859         break;
02860       }
02861     }
02862     break;
02863 
02864   case NT_ENCLOSE:
02865     {
02866       EncloseNode* en = NENCLOSE(node);
02867 
02868       if (IS_ENCLOSE_RECURSION(en)) {
02869         SET_ENCLOSE_STATUS(node, NST_MARK1);
02870         r = subexp_inf_recursive_check(en->target, env, 1);
02871         if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02872         CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02873       }
02874       r = subexp_inf_recursive_check_trav(en->target, env);
02875     }
02876 
02877     break;
02878 
02879   default:
02880     break;
02881   }
02882 
02883   return r;
02884 }
02885 
02886 static int
02887 subexp_recursive_check(Node* node)
02888 {
02889   int r = 0;
02890 
02891   switch (NTYPE(node)) {
02892   case NT_LIST:
02893   case NT_ALT:
02894     do {
02895       r |= subexp_recursive_check(NCAR(node));
02896     } while (IS_NOT_NULL(node = NCDR(node)));
02897     break;
02898 
02899   case NT_QTFR:
02900     r = subexp_recursive_check(NQTFR(node)->target);
02901     break;
02902 
02903   case NT_ANCHOR:
02904     {
02905       AnchorNode* an = NANCHOR(node);
02906       switch (an->type) {
02907       case ANCHOR_PREC_READ:
02908       case ANCHOR_PREC_READ_NOT:
02909       case ANCHOR_LOOK_BEHIND:
02910       case ANCHOR_LOOK_BEHIND_NOT:
02911         r = subexp_recursive_check(an->target);
02912         break;
02913       }
02914     }
02915     break;
02916 
02917   case NT_CALL:
02918     r = subexp_recursive_check(NCALL(node)->target);
02919     if (r != 0) SET_CALL_RECURSION(node);
02920     break;
02921 
02922   case NT_ENCLOSE:
02923     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02924       return 0;
02925     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02926       return 1; /* recursion */
02927     else {
02928       SET_ENCLOSE_STATUS(node, NST_MARK2);
02929       r = subexp_recursive_check(NENCLOSE(node)->target);
02930       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02931     }
02932     break;
02933 
02934   default:
02935     break;
02936   }
02937 
02938   return r;
02939 }
02940 
02941 
02942 static int
02943 subexp_recursive_check_trav(Node* node, ScanEnv* env)
02944 {
02945 #define FOUND_CALLED_NODE    1
02946 
02947   int type;
02948   int r = 0;
02949 
02950   type = NTYPE(node);
02951   switch (type) {
02952   case NT_LIST:
02953   case NT_ALT:
02954     {
02955       int ret;
02956       do {
02957         ret = subexp_recursive_check_trav(NCAR(node), env);
02958         if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
02959         else if (ret < 0) return ret;
02960       } while (IS_NOT_NULL(node = NCDR(node)));
02961     }
02962     break;
02963 
02964   case NT_QTFR:
02965     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
02966     if (NQTFR(node)->upper == 0) {
02967       if (r == FOUND_CALLED_NODE)
02968         NQTFR(node)->is_refered = 1;
02969     }
02970     break;
02971 
02972   case NT_ANCHOR:
02973     {
02974       AnchorNode* an = NANCHOR(node);
02975       switch (an->type) {
02976       case ANCHOR_PREC_READ:
02977       case ANCHOR_PREC_READ_NOT:
02978       case ANCHOR_LOOK_BEHIND:
02979       case ANCHOR_LOOK_BEHIND_NOT:
02980         r = subexp_recursive_check_trav(an->target, env);
02981         break;
02982       }
02983     }
02984     break;
02985 
02986   case NT_ENCLOSE:
02987     {
02988       EncloseNode* en = NENCLOSE(node);
02989 
02990       if (! IS_ENCLOSE_RECURSION(en)) {
02991         if (IS_ENCLOSE_CALLED(en)) {
02992           SET_ENCLOSE_STATUS(node, NST_MARK1);
02993           r = subexp_recursive_check(en->target);
02994           if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
02995           CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02996         }
02997       }
02998       r = subexp_recursive_check_trav(en->target, env);
02999       if (IS_ENCLOSE_CALLED(en))
03000         r |= FOUND_CALLED_NODE;
03001     }
03002     break;
03003 
03004   default:
03005     break;
03006   }
03007 
03008   return r;
03009 }
03010 
03011 static int
03012 setup_subexp_call(Node* node, ScanEnv* env)
03013 {
03014   int type;
03015   int r = 0;
03016 
03017   type = NTYPE(node);
03018   switch (type) {
03019   case NT_LIST:
03020     do {
03021       r = setup_subexp_call(NCAR(node), env);
03022     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03023     break;
03024 
03025   case NT_ALT:
03026     do {
03027       r = setup_subexp_call(NCAR(node), env);
03028     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03029     break;
03030 
03031   case NT_QTFR:
03032     r = setup_subexp_call(NQTFR(node)->target, env);
03033     break;
03034   case NT_ENCLOSE:
03035     r = setup_subexp_call(NENCLOSE(node)->target, env);
03036     break;
03037 
03038   case NT_CALL:
03039     {
03040       CallNode* cn = NCALL(node);
03041       Node** nodes = SCANENV_MEM_NODES(env);
03042 
03043       if (cn->group_num != 0) {
03044         int gnum = cn->group_num;
03045 
03046 #ifdef USE_NAMED_GROUP
03047         if (env->num_named > 0 &&
03048             IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03049             !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03050           return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03051         }
03052 #endif
03053         if (gnum > env->num_mem) {
03054           onig_scan_env_set_error_string(env,
03055                  ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03056           return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03057         }
03058 
03059 #ifdef USE_NAMED_GROUP
03060       set_call_attr:
03061 #endif
03062         cn->target = nodes[cn->group_num];
03063         if (IS_NULL(cn->target)) {
03064           onig_scan_env_set_error_string(env,
03065                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03066           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03067         }
03068         SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03069         BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03070         cn->unset_addr_list = env->unset_addr_list;
03071       }
03072 #ifdef USE_NAMED_GROUP
03073       else {
03074         int *refs;
03075 
03076         int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03077                                            &refs);
03078         if (n <= 0) {
03079           onig_scan_env_set_error_string(env,
03080                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03081           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03082         }
03083         else if (n > 1) {
03084           onig_scan_env_set_error_string(env,
03085             ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03086           return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03087         }
03088         else {
03089           cn->group_num = refs[0];
03090           goto set_call_attr;
03091         }
03092       }
03093 #endif
03094     }
03095     break;
03096 
03097   case NT_ANCHOR:
03098     {
03099       AnchorNode* an = NANCHOR(node);
03100 
03101       switch (an->type) {
03102       case ANCHOR_PREC_READ:
03103       case ANCHOR_PREC_READ_NOT:
03104       case ANCHOR_LOOK_BEHIND:
03105       case ANCHOR_LOOK_BEHIND_NOT:
03106         r = setup_subexp_call(an->target, env);
03107         break;
03108       }
03109     }
03110     break;
03111 
03112   default:
03113     break;
03114   }
03115 
03116   return r;
03117 }
03118 #endif
03119 
03120 /* divide different length alternatives in look-behind.
03121   (?<=A|B) ==> (?<=A)|(?<=B)
03122   (?<!A|B) ==> (?<!A)(?<!B)
03123 */
03124 static int
03125 divide_look_behind_alternatives(Node* node)
03126 {
03127   Node *head, *np, *insert_node;
03128   AnchorNode* an = NANCHOR(node);
03129   int anc_type = an->type;
03130 
03131   head = an->target;
03132   np = NCAR(head);
03133   swap_node(node, head);
03134   NCAR(node) = head;
03135   NANCHOR(head)->target = np;
03136 
03137   np = node;
03138   while ((np = NCDR(np)) != NULL_NODE) {
03139     insert_node = onig_node_new_anchor(anc_type);
03140     CHECK_NULL_RETURN_MEMERR(insert_node);
03141     NANCHOR(insert_node)->target = NCAR(np);
03142     NCAR(np) = insert_node;
03143   }
03144 
03145   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03146     np = node;
03147     do {
03148       SET_NTYPE(np, NT_LIST);  /* alt -> list */
03149     } while ((np = NCDR(np)) != NULL_NODE);
03150   }
03151   return 0;
03152 }
03153 
03154 static int
03155 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03156 {
03157   int r, len;
03158   AnchorNode* an = NANCHOR(node);
03159 
03160   r = get_char_length_tree(an->target, reg, &len);
03161   if (r == 0)
03162     an->char_len = len;
03163   else if (r == GET_CHAR_LEN_VARLEN)
03164     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03165   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03166     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03167       r = divide_look_behind_alternatives(node);
03168     else
03169       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03170   }
03171 
03172   return r;
03173 }
03174 
03175 static int
03176 next_setup(Node* node, Node* next_node, regex_t* reg)
03177 {
03178   int type;
03179 
03180  retry:
03181   type = NTYPE(node);
03182   if (type == NT_QTFR) {
03183     QtfrNode* qn = NQTFR(node);
03184     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03185 #ifdef USE_QTFR_PEEK_NEXT
03186       Node* n = get_head_value_node(next_node, 1, reg);
03187       /* '\0': for UTF-16BE etc... */
03188       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03189         qn->next_head_exact = n;
03190       }
03191 #endif
03192       /* automatic posseivation a*b ==> (?>a*)b */
03193       if (qn->lower <= 1) {
03194         int ttype = NTYPE(qn->target);
03195         if (IS_NODE_TYPE_SIMPLE(ttype)) {
03196           Node *x, *y;
03197           x = get_head_value_node(qn->target, 0, reg);
03198           if (IS_NOT_NULL(x)) {
03199             y = get_head_value_node(next_node,  0, reg);
03200             if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03201               Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03202               CHECK_NULL_RETURN_MEMERR(en);
03203               SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03204               swap_node(node, en);
03205               NENCLOSE(node)->target = en;
03206             }
03207           }
03208         }
03209       }
03210     }
03211   }
03212   else if (type == NT_ENCLOSE) {
03213     EncloseNode* en = NENCLOSE(node);
03214     if (en->type == ENCLOSE_MEMORY) {
03215       node = en->target;
03216       goto retry;
03217     }
03218   }
03219   return 0;
03220 }
03221 
03222 
03223 static int
03224 update_string_node_case_fold(regex_t* reg, Node *node)
03225 {
03226   UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03227   UChar *sbuf, *ebuf, *sp;
03228   int r, i, len;
03229   OnigDistance sbuf_size;
03230   StrNode* sn = NSTR(node);
03231 
03232   end = sn->end;
03233   sbuf_size = (end - sn->s) * 2;
03234   sbuf = (UChar* )xmalloc(sbuf_size);
03235   CHECK_NULL_RETURN_MEMERR(sbuf);
03236   ebuf = sbuf + sbuf_size;
03237 
03238   sp = sbuf;
03239   p = sn->s;
03240   while (p < end) {
03241     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03242     q = buf;
03243     for (i = 0; i < len; i++) {
03244       if (sp >= ebuf) {
03245         sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03246         CHECK_NULL_RETURN_MEMERR(sbuf);
03247         sp = sbuf + sbuf_size;
03248         sbuf_size *= 2;
03249         ebuf = sbuf + sbuf_size;
03250       }
03251 
03252       *sp++ = buf[i];
03253     }
03254   }
03255 
03256   r = onig_node_str_set(node, sbuf, sp);
03257   if (r != 0) {
03258     xfree(sbuf);
03259     return r;
03260   }
03261 
03262   xfree(sbuf);
03263   return 0;
03264 }
03265 
03266 static int
03267 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03268                                  regex_t* reg)
03269 {
03270   int r;
03271   Node *node;
03272 
03273   node = onig_node_new_str(s, end);
03274   if (IS_NULL(node)) return ONIGERR_MEMORY;
03275 
03276   r = update_string_node_case_fold(reg, node);
03277   if (r != 0) {
03278     onig_node_free(node);
03279     return r;
03280   }
03281 
03282   NSTRING_SET_AMBIG(node);
03283   NSTRING_SET_DONT_GET_OPT_INFO(node);
03284   *rnode = node;
03285   return 0;
03286 }
03287 
03288 static int
03289 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03290                             UChar *p, int slen, UChar *end,
03291                             regex_t* reg, Node **rnode)
03292 {
03293   int r, i, j, len, varlen;
03294   Node *anode, *var_anode, *snode, *xnode, *an;
03295   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03296 
03297   *rnode = var_anode = NULL_NODE;
03298 
03299   varlen = 0;
03300   for (i = 0; i < item_num; i++) {
03301     if (items[i].byte_len != slen) {
03302       varlen = 1;
03303       break;
03304     }
03305   }
03306 
03307   if (varlen != 0) {
03308     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03309     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03310 
03311     xnode = onig_node_new_list(NULL, NULL);
03312     if (IS_NULL(xnode)) goto mem_err;
03313     NCAR(var_anode) = xnode;
03314 
03315     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03316     if (IS_NULL(anode)) goto mem_err;
03317     NCAR(xnode) = anode;
03318   }
03319   else {
03320     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03321     if (IS_NULL(anode)) return ONIGERR_MEMORY;
03322   }
03323 
03324   snode = onig_node_new_str(p, p + slen);
03325   if (IS_NULL(snode)) goto mem_err;
03326 
03327   NCAR(anode) = snode;
03328 
03329   for (i = 0; i < item_num; i++) {
03330     snode = onig_node_new_str(NULL, NULL);
03331     if (IS_NULL(snode)) goto mem_err;
03332 
03333     for (j = 0; j < items[i].code_len; j++) {
03334       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03335       if (len < 0) {
03336         r = len;
03337         goto mem_err2;
03338       }
03339 
03340       r = onig_node_str_cat(snode, buf, buf + len);
03341       if (r != 0) goto mem_err2;
03342     }
03343 
03344     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03345     if (IS_NULL(an)) {
03346       goto mem_err2;
03347     }
03348 
03349     if (items[i].byte_len != slen) {
03350       Node *rem;
03351       UChar *q = p + items[i].byte_len;
03352 
03353       if (q < end) {
03354         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03355         if (r != 0) {
03356           onig_node_free(an);
03357           goto mem_err2;
03358         }
03359 
03360         xnode = onig_node_list_add(NULL_NODE, snode);
03361         if (IS_NULL(xnode)) {
03362           onig_node_free(an);
03363           onig_node_free(rem);
03364           goto mem_err2;
03365         }
03366         if (IS_NULL(onig_node_list_add(xnode, rem))) {
03367           onig_node_free(an);
03368           onig_node_free(xnode);
03369           onig_node_free(rem);
03370           goto mem_err;
03371         }
03372 
03373         NCAR(an) = xnode;
03374       }
03375       else {
03376         NCAR(an) = snode;
03377       }
03378 
03379       NCDR(var_anode) = an;
03380       var_anode = an;
03381     }
03382     else {
03383       NCAR(an)     = snode;
03384       NCDR(anode) = an;
03385       anode = an;
03386     }
03387   }
03388 
03389   return varlen;
03390 
03391  mem_err2:
03392   onig_node_free(snode);
03393 
03394  mem_err:
03395   onig_node_free(*rnode);
03396 
03397   return ONIGERR_MEMORY;
03398 }
03399 
03400 static int
03401 expand_case_fold_string(Node* node, regex_t* reg)
03402 {
03403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
03404 
03405   int r, n, len, alt_num;
03406   UChar *start, *end, *p;
03407   Node *top_root, *root, *snode, *prev_node;
03408   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03409   StrNode* sn = NSTR(node);
03410 
03411   if (NSTRING_IS_AMBIG(node)) return 0;
03412 
03413   start = sn->s;
03414   end   = sn->end;
03415   if (start >= end) return 0;
03416 
03417   r = 0;
03418   top_root = root = prev_node = snode = NULL_NODE;
03419   alt_num = 1;
03420   p = start;
03421   while (p < end) {
03422     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03423                                            p, end, items);
03424     if (n < 0) {
03425       r = n;
03426       goto err;
03427     }
03428 
03429     len = enclen(reg->enc, p, end);
03430 
03431     if (n == 0) {
03432       if (IS_NULL(snode)) {
03433         if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03434           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03435           if (IS_NULL(root)) {
03436             onig_node_free(prev_node);
03437             goto mem_err;
03438           }
03439         }
03440 
03441         prev_node = snode = onig_node_new_str(NULL, NULL);
03442         if (IS_NULL(snode)) goto mem_err;
03443         if (IS_NOT_NULL(root)) {
03444           if (IS_NULL(onig_node_list_add(root, snode))) {
03445             onig_node_free(snode);
03446             goto mem_err;
03447           }
03448         }
03449       }
03450 
03451       r = onig_node_str_cat(snode, p, p + len);
03452       if (r != 0) goto err;
03453     }
03454     else {
03455       alt_num *= (n + 1);
03456       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03457 
03458       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03459         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03460         if (IS_NULL(root)) {
03461           onig_node_free(prev_node);
03462           goto mem_err;
03463         }
03464       }
03465 
03466       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03467       if (r < 0) goto mem_err;
03468       if (r == 1) {
03469         if (IS_NULL(root)) {
03470           top_root = prev_node;
03471         }
03472         else {
03473           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03474             onig_node_free(prev_node);
03475             goto mem_err;
03476           }
03477         }
03478 
03479         root = NCAR(prev_node);
03480       }
03481       else { /* r == 0 */
03482         if (IS_NOT_NULL(root)) {
03483           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03484             onig_node_free(prev_node);
03485             goto mem_err;
03486           }
03487         }
03488       }
03489 
03490       snode = NULL_NODE;
03491     }
03492 
03493     p += len;
03494   }
03495 
03496   if (p < end) {
03497     Node *srem;
03498 
03499     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03500     if (r != 0) goto mem_err;
03501 
03502     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03503       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03504       if (IS_NULL(root)) {
03505         onig_node_free(srem);
03506         onig_node_free(prev_node);
03507         goto mem_err;
03508       }
03509     }
03510 
03511     if (IS_NULL(root)) {
03512       prev_node = srem;
03513     }
03514     else {
03515       if (IS_NULL(onig_node_list_add(root, srem))) {
03516         onig_node_free(srem);
03517         goto mem_err;
03518       }
03519     }
03520   }
03521 
03522   /* ending */
03523   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03524   swap_node(node, top_root);
03525   onig_node_free(top_root);
03526   return 0;
03527 
03528  mem_err:
03529   r = ONIGERR_MEMORY;
03530 
03531  err:
03532   onig_node_free(top_root);
03533   return r;
03534 }
03535 
03536 
03537 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03538 
03539 #define CEC_THRES_NUM_BIG_REPEAT         512
03540 #define CEC_INFINITE_NUM          0x7fffffff
03541 
03542 #define CEC_IN_INFINITE_REPEAT    (1<<0)
03543 #define CEC_IN_FINITE_REPEAT      (1<<1)
03544 #define CEC_CONT_BIG_REPEAT       (1<<2)
03545 
03546 static int
03547 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03548 {
03549   int type;
03550   int r = state;
03551 
03552   type = NTYPE(node);
03553   switch (type) {
03554   case NT_LIST:
03555     {
03556       Node* prev = NULL_NODE;
03557       do {
03558         r = setup_comb_exp_check(NCAR(node), r, env);
03559         prev = NCAR(node);
03560       } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03561     }
03562     break;
03563 
03564   case NT_ALT:
03565     {
03566       int ret;
03567       do {
03568         ret = setup_comb_exp_check(NCAR(node), state, env);
03569         r |= ret;
03570       } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03571     }
03572     break;
03573 
03574   case NT_QTFR:
03575     {
03576       int child_state = state;
03577       int add_state = 0;
03578       QtfrNode* qn = NQTFR(node);
03579       Node* target = qn->target;
03580       int var_num;
03581 
03582       if (! IS_REPEAT_INFINITE(qn->upper)) {
03583         if (qn->upper > 1) {
03584           /* {0,1}, {1,1} are allowed */
03585           child_state |= CEC_IN_FINITE_REPEAT;
03586 
03587           /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
03588           if (env->backrefed_mem == 0) {
03589             if (NTYPE(qn->target) == NT_ENCLOSE) {
03590               EncloseNode* en = NENCLOSE(qn->target);
03591               if (en->type == ENCLOSE_MEMORY) {
03592                 if (NTYPE(en->target) == NT_QTFR) {
03593                   QtfrNode* q = NQTFR(en->target);
03594                   if (IS_REPEAT_INFINITE(q->upper)
03595                       && q->greedy == qn->greedy) {
03596                     qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03597                     if (qn->upper == 1)
03598                       child_state = state;
03599                   }
03600                 }
03601               }
03602             }
03603           }
03604         }
03605       }
03606 
03607       if (state & CEC_IN_FINITE_REPEAT) {
03608         qn->comb_exp_check_num = -1;
03609       }
03610       else {
03611         if (IS_REPEAT_INFINITE(qn->upper)) {
03612           var_num = CEC_INFINITE_NUM;
03613           child_state |= CEC_IN_INFINITE_REPEAT;
03614         }
03615         else {
03616           var_num = qn->upper - qn->lower;
03617         }
03618 
03619         if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03620           add_state |= CEC_CONT_BIG_REPEAT;
03621 
03622         if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03623             ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03624              var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03625           if (qn->comb_exp_check_num == 0) {
03626             env->num_comb_exp_check++;
03627             qn->comb_exp_check_num = env->num_comb_exp_check;
03628             if (env->curr_max_regnum > env->comb_exp_max_regnum)
03629               env->comb_exp_max_regnum = env->curr_max_regnum;
03630           }
03631         }
03632       }
03633 
03634       r = setup_comb_exp_check(target, child_state, env);
03635       r |= add_state;
03636     }
03637     break;
03638 
03639   case NT_ENCLOSE:
03640     {
03641       EncloseNode* en = NENCLOSE(node);
03642 
03643       switch (en->type) {
03644       case ENCLOSE_MEMORY:
03645         {
03646           if (env->curr_max_regnum < en->regnum)
03647             env->curr_max_regnum = en->regnum;
03648 
03649           r = setup_comb_exp_check(en->target, state, env);
03650         }
03651         break;
03652 
03653       default:
03654         r = setup_comb_exp_check(en->target, state, env);
03655         break;
03656       }
03657     }
03658     break;
03659 
03660 #ifdef USE_SUBEXP_CALL
03661   case NT_CALL:
03662     if (IS_CALL_RECURSION(NCALL(node)))
03663       env->has_recursion = 1;
03664     else
03665       r = setup_comb_exp_check(NCALL(node)->target, state, env);
03666     break;
03667 #endif
03668 
03669   default:
03670     break;
03671   }
03672 
03673   return r;
03674 }
03675 #endif
03676 
03677 #define IN_ALT        (1<<0)
03678 #define IN_NOT        (1<<1)
03679 #define IN_REPEAT     (1<<2)
03680 #define IN_VAR_REPEAT (1<<3)
03681 
03682 /* setup_tree does the following work.
03683  1. check empty loop. (set qn->target_empty_info)
03684  2. expand ignore-case in char class.
03685  3. set memory status bit flags. (reg->mem_stats)
03686  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
03687  5. find invalid patterns in look-behind.
03688  6. expand repeated string.
03689  */
03690 static int
03691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03692 {
03693   int type;
03694   int r = 0;
03695 
03696 restart:
03697   type = NTYPE(node);
03698   switch (type) {
03699   case NT_LIST:
03700     {
03701       Node* prev = NULL_NODE;
03702       do {
03703         r = setup_tree(NCAR(node), reg, state, env);
03704         if (IS_NOT_NULL(prev) && r == 0) {
03705           r = next_setup(prev, NCAR(node), reg);
03706         }
03707         prev = NCAR(node);
03708       } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03709     }
03710     break;
03711 
03712   case NT_ALT:
03713     do {
03714       r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03715     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03716     break;
03717 
03718   case NT_CCLASS:
03719     break;
03720 
03721   case NT_STR:
03722     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03723       r = expand_case_fold_string(node, reg);
03724     }
03725     break;
03726 
03727   case NT_CTYPE:
03728   case NT_CANY:
03729     break;
03730 
03731 #ifdef USE_SUBEXP_CALL
03732   case NT_CALL:
03733     break;
03734 #endif
03735 
03736   case NT_BREF:
03737     {
03738       int i;
03739       int* p;
03740       Node** nodes = SCANENV_MEM_NODES(env);
03741       BRefNode* br = NBREF(node);
03742       p = BACKREFS_P(br);
03743       for (i = 0; i < br->back_num; i++) {
03744         if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
03745         BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03746         BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03747 #ifdef USE_BACKREF_WITH_LEVEL
03748         if (IS_BACKREF_NEST_LEVEL(br)) {
03749           BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03750         }
03751 #endif
03752         SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03753       }
03754     }
03755     break;
03756 
03757   case NT_QTFR:
03758     {
03759       OnigDistance d;
03760       QtfrNode* qn = NQTFR(node);
03761       Node* target = qn->target;
03762 
03763       if ((state & IN_REPEAT) != 0) {
03764         qn->state |= NST_IN_REPEAT;
03765       }
03766 
03767       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03768         r = get_min_match_length(target, &d, env);
03769         if (r) break;
03770         if (d == 0) {
03771           qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03773           r = quantifiers_memory_node_info(target);
03774           if (r < 0) break;
03775           if (r > 0) {
03776             qn->target_empty_info = r;
03777           }
03778 #endif
03779 #if 0
03780           r = get_max_match_length(target, &d, env);
03781           if (r == 0 && d == 0) {
03782             /*  ()* ==> ()?, ()+ ==> ()  */
03783             qn->upper = 1;
03784             if (qn->lower > 1) qn->lower = 1;
03785             if (NTYPE(target) == NT_STR) {
03786               qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
03787             }
03788           }
03789 #endif
03790         }
03791       }
03792 
03793       state |= IN_REPEAT;
03794       if (qn->lower != qn->upper)
03795         state |= IN_VAR_REPEAT;
03796       r = setup_tree(target, reg, state, env);
03797       if (r) break;
03798 
03799       /* expand string */
03800 #define EXPAND_STRING_MAX_LENGTH  100
03801       if (NTYPE(target) == NT_STR) {
03802         if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
03803             qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03804           OnigDistance len = NSTRING_LEN(target);
03805           StrNode* sn = NSTR(target);
03806 
03807           if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03808             int i, n = qn->lower;
03809             onig_node_conv_to_str_node(node, NSTR(target)->flag);
03810             for (i = 0; i < n; i++) {
03811               r = onig_node_str_cat(node, sn->s, sn->end);
03812               if (r) break;
03813             }
03814             onig_node_free(target);
03815             break; /* break case NT_QTFR: */
03816           }
03817         }
03818       }
03819 
03820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
03821       if (qn->greedy && (qn->target_empty_info != 0)) {
03822         if (NTYPE(target) == NT_QTFR) {
03823           QtfrNode* tqn = NQTFR(target);
03824           if (IS_NOT_NULL(tqn->head_exact)) {
03825             qn->head_exact  = tqn->head_exact;
03826             tqn->head_exact = NULL;
03827           }
03828         }
03829         else {
03830           qn->head_exact = get_head_value_node(qn->target, 1, reg);
03831         }
03832       }
03833 #endif
03834     }
03835     break;
03836 
03837   case NT_ENCLOSE:
03838     {
03839       EncloseNode* en = NENCLOSE(node);
03840 
03841       switch (en->type) {
03842       case ENCLOSE_OPTION:
03843         {
03844           OnigOptionType options = reg->options;
03845           reg->options = NENCLOSE(node)->option;
03846           r = setup_tree(NENCLOSE(node)->target, reg, state, env);
03847           reg->options = options;
03848         }
03849         break;
03850 
03851       case ENCLOSE_MEMORY:
03852         if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
03853           BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
03854           /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
03855         }
03856         r = setup_tree(en->target, reg, state, env);
03857         break;
03858 
03859       case ENCLOSE_STOP_BACKTRACK:
03860         {
03861           Node* target = en->target;
03862           r = setup_tree(target, reg, state, env);
03863           if (NTYPE(target) == NT_QTFR) {
03864             QtfrNode* tqn = NQTFR(target);
03865             if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
03866                 tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
03867               int qtype = NTYPE(tqn->target);
03868               if (IS_NODE_TYPE_SIMPLE(qtype))
03869                 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
03870             }
03871           }
03872         }
03873         break;
03874       }
03875     }
03876     break;
03877 
03878   case NT_ANCHOR:
03879     {
03880       AnchorNode* an = NANCHOR(node);
03881 
03882       switch (an->type) {
03883       case ANCHOR_PREC_READ:
03884         r = setup_tree(an->target, reg, state, env);
03885         break;
03886       case ANCHOR_PREC_READ_NOT:
03887         r = setup_tree(an->target, reg, (state | IN_NOT), env);
03888         break;
03889 
03890 /* allowed node types in look-behind */
03891 #define ALLOWED_TYPE_IN_LB  \
03892   ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
03893     BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
03894 
03895 #define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
03896 #define ALLOWED_ENCLOSE_IN_LB_NOT   0
03897 
03898 #define ALLOWED_ANCHOR_IN_LB \
03899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03900 #define ALLOWED_ANCHOR_IN_LB_NOT \
03901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03902 
03903       case ANCHOR_LOOK_BEHIND:
03904         {
03905           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03906                               ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
03907           if (r < 0) return r;
03908           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03909           r = setup_look_behind(node, reg, env);
03910           if (r != 0) return r;
03911           if (NTYPE(node) != NT_ANCHOR) goto restart;
03912           r = setup_tree(an->target, reg, state, env);
03913         }
03914         break;
03915 
03916       case ANCHOR_LOOK_BEHIND_NOT:
03917         {
03918           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03919                       ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
03920           if (r < 0) return r;
03921           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03922           r = setup_look_behind(node, reg, env);
03923           if (r != 0) return r;
03924           if (NTYPE(node) != NT_ANCHOR) goto restart;
03925           r = setup_tree(an->target, reg, (state | IN_NOT), env);
03926         }
03927         break;
03928       }
03929     }
03930     break;
03931 
03932   default:
03933     break;
03934   }
03935 
03936   return r;
03937 }
03938 
03939 /* set skip map for Boyer-Moor search */
03940 static int
03941 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03942             UChar skip[], int** int_skip)
03943 {
03944   OnigDistance i, len;
03945 
03946   len = end - s;
03947   if (len < ONIG_CHAR_TABLE_SIZE) {
03948     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
03949 
03950     for (i = 0; i < len - 1; i++)
03951       skip[s[i]] = len - 1 - i;
03952   }
03953   else {
03954     if (IS_NULL(*int_skip)) {
03955       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03956       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
03957     }
03958     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len;
03959 
03960     for (i = 0; i < len - 1; i++)
03961       (*int_skip)[s[i]] = (int)(len - 1 - i);
03962   }
03963   return 0;
03964 }
03965 
03966 #define OPT_EXACT_MAXLEN   24
03967 
03968 typedef struct {
03969   OnigDistance min;  /* min byte length */
03970   OnigDistance max;  /* max byte length */
03971 } MinMaxLen;
03972 
03973 typedef struct {
03974   MinMaxLen        mmd;
03975   OnigEncoding     enc;
03976   OnigOptionType   options;
03977   OnigCaseFoldType case_fold_flag;
03978   ScanEnv*         scan_env;
03979 } OptEnv;
03980 
03981 typedef struct {
03982   int left_anchor;
03983   int right_anchor;
03984 } OptAncInfo;
03985 
03986 typedef struct {
03987   MinMaxLen  mmd; /* info position */
03988   OptAncInfo anc;
03989 
03990   int   reach_end;
03991   int   ignore_case;
03992   int   len;
03993   UChar s[OPT_EXACT_MAXLEN];
03994 } OptExactInfo;
03995 
03996 typedef struct {
03997   MinMaxLen mmd; /* info position */
03998   OptAncInfo anc;
03999 
04000   int   value;      /* weighted value */
04001   UChar map[ONIG_CHAR_TABLE_SIZE];
04002 } OptMapInfo;
04003 
04004 typedef struct {
04005   MinMaxLen    len;
04006 
04007   OptAncInfo   anc;
04008   OptExactInfo exb;    /* boundary */
04009   OptExactInfo exm;    /* middle */
04010   OptExactInfo expr;   /* prec read (?=...) */
04011 
04012   OptMapInfo   map;   /* boundary */
04013 } NodeOptInfo;
04014 
04015 
04016 static int
04017 map_position_value(OnigEncoding enc, int i)
04018 {
04019   static const short int ByteValTable[] = {
04020      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
04021      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
04022     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
04023      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
04024      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04025      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
04026      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04027      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
04028   };
04029 
04030   if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04031     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04032       return 20;
04033     else
04034       return (int )ByteValTable[i];
04035   }
04036   else
04037     return 4;   /* Take it easy. */
04038 }
04039 
04040 static int
04041 distance_value(MinMaxLen* mm)
04042 {
04043   /* 1000 / (min-max-dist + 1) */
04044   static const short int dist_vals[] = {
04045     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
04046       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
04047       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
04048       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
04049       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
04050       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
04051       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
04052       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
04053       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
04054       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
04055   };
04056 
04057   OnigDistance d;
04058 
04059   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04060 
04061   d = mm->max - mm->min;
04062   if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
04063     /* return dist_vals[d] * 16 / (mm->min + 12); */
04064     return (int )dist_vals[d];
04065   else
04066     return 1;
04067 }
04068 
04069 static int
04070 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04071 {
04072   if (v2 <= 0) return -1;
04073   if (v1 <= 0) return  1;
04074 
04075   v1 *= distance_value(d1);
04076   v2 *= distance_value(d2);
04077 
04078   if (v2 > v1) return  1;
04079   if (v2 < v1) return -1;
04080 
04081   if (d2->min < d1->min) return  1;
04082   if (d2->min > d1->min) return -1;
04083   return 0;
04084 }
04085 
04086 static int
04087 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04088 {
04089   return (a->min == b->min && a->max == b->max) ? 1 : 0;
04090 }
04091 
04092 
04093 static void
04094 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04095 {
04096   mml->min = min;
04097   mml->max = max;
04098 }
04099 
04100 static void
04101 clear_mml(MinMaxLen* mml)
04102 {
04103   mml->min = mml->max = 0;
04104 }
04105 
04106 static void
04107 copy_mml(MinMaxLen* to, MinMaxLen* from)
04108 {
04109   to->min = from->min;
04110   to->max = from->max;
04111 }
04112 
04113 static void
04114 add_mml(MinMaxLen* to, MinMaxLen* from)
04115 {
04116   to->min = distance_add(to->min, from->min);
04117   to->max = distance_add(to->max, from->max);
04118 }
04119 
04120 #if 0
04121 static void
04122 add_len_mml(MinMaxLen* to, OnigDistance len)
04123 {
04124   to->min = distance_add(to->min, len);
04125   to->max = distance_add(to->max, len);
04126 }
04127 #endif
04128 
04129 static void
04130 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04131 {
04132   if (to->min > from->min) to->min = from->min;
04133   if (to->max < from->max) to->max = from->max;
04134 }
04135 
04136 static void
04137 copy_opt_env(OptEnv* to, OptEnv* from)
04138 {
04139   *to = *from;
04140 }
04141 
04142 static void
04143 clear_opt_anc_info(OptAncInfo* anc)
04144 {
04145   anc->left_anchor  = 0;
04146   anc->right_anchor = 0;
04147 }
04148 
04149 static void
04150 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04151 {
04152   *to = *from;
04153 }
04154 
04155 static void
04156 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04157                     OnigDistance left_len, OnigDistance right_len)
04158 {
04159   clear_opt_anc_info(to);
04160 
04161   to->left_anchor = left->left_anchor;
04162   if (left_len == 0) {
04163     to->left_anchor |= right->left_anchor;
04164   }
04165 
04166   to->right_anchor = right->right_anchor;
04167   if (right_len == 0) {
04168     to->right_anchor |= left->right_anchor;
04169   }
04170 }
04171 
04172 static int
04173 is_left_anchor(int anc)
04174 {
04175   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04176       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04177       anc == ANCHOR_PREC_READ_NOT)
04178     return 0;
04179 
04180   return 1;
04181 }
04182 
04183 static int
04184 is_set_opt_anc_info(OptAncInfo* to, int anc)
04185 {
04186   if ((to->left_anchor & anc) != 0) return 1;
04187 
04188   return ((to->right_anchor & anc) != 0 ? 1 : 0);
04189 }
04190 
04191 static void
04192 add_opt_anc_info(OptAncInfo* to, int anc)
04193 {
04194   if (is_left_anchor(anc))
04195     to->left_anchor |= anc;
04196   else
04197     to->right_anchor |= anc;
04198 }
04199 
04200 static void
04201 remove_opt_anc_info(OptAncInfo* to, int anc)
04202 {
04203   if (is_left_anchor(anc))
04204     to->left_anchor &= ~anc;
04205   else
04206     to->right_anchor &= ~anc;
04207 }
04208 
04209 static void
04210 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04211 {
04212   to->left_anchor  &= add->left_anchor;
04213   to->right_anchor &= add->right_anchor;
04214 }
04215 
04216 static int
04217 is_full_opt_exact_info(OptExactInfo* ex)
04218 {
04219   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04220 }
04221 
04222 static void
04223 clear_opt_exact_info(OptExactInfo* ex)
04224 {
04225   clear_mml(&ex->mmd);
04226   clear_opt_anc_info(&ex->anc);
04227   ex->reach_end   = 0;
04228   ex->ignore_case = 0;
04229   ex->len         = 0;
04230   ex->s[0]        = '\0';
04231 }
04232 
04233 static void
04234 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04235 {
04236   *to = *from;
04237 }
04238 
04239 static void
04240 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04241 {
04242   int i, j, len;
04243   UChar *p, *end;
04244   OptAncInfo tanc;
04245 
04246   if (! to->ignore_case && add->ignore_case) {
04247     if (to->len >= add->len) return ;  /* avoid */
04248 
04249     to->ignore_case = 1;
04250   }
04251 
04252   p = add->s;
04253   end = p + add->len;
04254   for (i = to->len; p < end; ) {
04255     len = enclen(enc, p, end);
04256     if (i + len > OPT_EXACT_MAXLEN) break;
04257     for (j = 0; j < len && p < end; j++)
04258       to->s[i++] = *p++;
04259   }
04260 
04261   to->len = i;
04262   to->reach_end = (p == end ? add->reach_end : 0);
04263 
04264   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04265   if (! to->reach_end) tanc.right_anchor = 0;
04266   copy_opt_anc_info(&to->anc, &tanc);
04267 }
04268 
04269 static void
04270 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04271                           int raw ARG_UNUSED, OnigEncoding enc)
04272 {
04273   int i, j, len;
04274   UChar *p;
04275 
04276   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04277     len = enclen(enc, p, end);
04278     if (i + len > OPT_EXACT_MAXLEN) break;
04279     for (j = 0; j < len && p < end; j++)
04280       to->s[i++] = *p++;
04281   }
04282 
04283   to->len = i;
04284 }
04285 
04286 static void
04287 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04288 {
04289   int i, j, len;
04290 
04291   if (add->len == 0 || to->len == 0) {
04292     clear_opt_exact_info(to);
04293     return ;
04294   }
04295 
04296   if (! is_equal_mml(&to->mmd, &add->mmd)) {
04297     clear_opt_exact_info(to);
04298     return ;
04299   }
04300 
04301   for (i = 0; i < to->len && i < add->len; ) {
04302     if (to->s[i] != add->s[i]) break;
04303     len = enclen(env->enc, to->s + i, to->s + to->len);
04304 
04305     for (j = 1; j < len; j++) {
04306       if (to->s[i+j] != add->s[i+j]) break;
04307     }
04308     if (j < len) break;
04309     i += len;
04310   }
04311 
04312   if (! add->reach_end || i < add->len || i < to->len) {
04313     to->reach_end = 0;
04314   }
04315   to->len = i;
04316   to->ignore_case |= add->ignore_case;
04317 
04318   alt_merge_opt_anc_info(&to->anc, &add->anc);
04319   if (! to->reach_end) to->anc.right_anchor = 0;
04320 }
04321 
04322 static void
04323 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04324 {
04325   int v1, v2;
04326 
04327   v1 = now->len;
04328   v2 = alt->len;
04329 
04330   if (v2 == 0) {
04331     return ;
04332   }
04333   else if (v1 == 0) {
04334     copy_opt_exact_info(now, alt);
04335     return ;
04336   }
04337   else if (v1 <= 2 && v2 <= 2) {
04338     /* ByteValTable[x] is big value --> low price */
04339     v2 = map_position_value(enc, now->s[0]);
04340     v1 = map_position_value(enc, alt->s[0]);
04341 
04342     if (now->len > 1) v1 += 5;
04343     if (alt->len > 1) v2 += 5;
04344   }
04345 
04346   if (now->ignore_case == 0) v1 *= 2;
04347   if (alt->ignore_case == 0) v2 *= 2;
04348 
04349   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04350     copy_opt_exact_info(now, alt);
04351 }
04352 
04353 static void
04354 clear_opt_map_info(OptMapInfo* map)
04355 {
04356   static const OptMapInfo clean_info = {
04357     {0, 0}, {0, 0}, 0,
04358     {
04359       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04360       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04361       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04362       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04363       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04364       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04365       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04366       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04367       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04368       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04369       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04370       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04371       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04372       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04373       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04374       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04375     }
04376   };
04377 
04378   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04379 }
04380 
04381 static void
04382 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04383 {
04384   *to = *from;
04385 }
04386 
04387 static void
04388 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04389 {
04390   if (map->map[c] == 0) {
04391     map->map[c] = 1;
04392     map->value += map_position_value(enc, c);
04393   }
04394 }
04395 
04396 static int
04397 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04398                           OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04399 {
04400   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04401   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04402   int i, n;
04403 
04404   add_char_opt_map_info(map, p[0], enc);
04405 
04406   case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04407   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04408   if (n < 0) return n;
04409 
04410   for (i = 0; i < n; i++) {
04411     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04412     add_char_opt_map_info(map, buf[0], enc);
04413   }
04414 
04415   return 0;
04416 }
04417 
04418 static void
04419 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04420 {
04421   const int z = 1<<15; /* 32768: something big value */
04422 
04423   int v1, v2;
04424 
04425   if (alt->value == 0) return ;
04426   if (now->value == 0) {
04427     copy_opt_map_info(now, alt);
04428     return ;
04429   }
04430 
04431   v1 = z / now->value;
04432   v2 = z / alt->value;
04433   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04434     copy_opt_map_info(now, alt);
04435 }
04436 
04437 static int
04438 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04439 {
04440 #define COMP_EM_BASE  20
04441   int ve, vm;
04442 
04443   if (m->value <= 0) return -1;
04444 
04445   ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
04446   vm = COMP_EM_BASE * 5 * 2 / m->value;
04447   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04448 }
04449 
04450 static void
04451 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04452 {
04453   int i, val;
04454 
04455   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
04456   if (to->value == 0) return ;
04457   if (add->value == 0 || to->mmd.max < add->mmd.min) {
04458     clear_opt_map_info(to);
04459     return ;
04460   }
04461 
04462   alt_merge_mml(&to->mmd, &add->mmd);
04463 
04464   val = 0;
04465   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04466     if (add->map[i])
04467       to->map[i] = 1;
04468 
04469     if (to->map[i])
04470       val += map_position_value(enc, i);
04471   }
04472   to->value = val;
04473 
04474   alt_merge_opt_anc_info(&to->anc, &add->anc);
04475 }
04476 
04477 static void
04478 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04479 {
04480   copy_mml(&(opt->exb.mmd),  mmd);
04481   copy_mml(&(opt->expr.mmd), mmd);
04482   copy_mml(&(opt->map.mmd),  mmd);
04483 }
04484 
04485 static void
04486 clear_node_opt_info(NodeOptInfo* opt)
04487 {
04488   clear_mml(&opt->len);
04489   clear_opt_anc_info(&opt->anc);
04490   clear_opt_exact_info(&opt->exb);
04491   clear_opt_exact_info(&opt->exm);
04492   clear_opt_exact_info(&opt->expr);
04493   clear_opt_map_info(&opt->map);
04494 }
04495 
04496 static void
04497 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04498 {
04499   *to = *from;
04500 }
04501 
04502 static void
04503 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04504 {
04505   int exb_reach, exm_reach;
04506   OptAncInfo tanc;
04507 
04508   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04509   copy_opt_anc_info(&to->anc, &tanc);
04510 
04511   if (add->exb.len > 0 && to->len.max == 0) {
04512     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04513                         to->len.max, add->len.max);
04514     copy_opt_anc_info(&add->exb.anc, &tanc);
04515   }
04516 
04517   if (add->map.value > 0 && to->len.max == 0) {
04518     if (add->map.mmd.max == 0)
04519       add->map.anc.left_anchor |= to->anc.left_anchor;
04520   }
04521 
04522   exb_reach = to->exb.reach_end;
04523   exm_reach = to->exm.reach_end;
04524 
04525   if (add->len.max != 0)
04526     to->exb.reach_end = to->exm.reach_end = 0;
04527 
04528   if (add->exb.len > 0) {
04529     if (exb_reach) {
04530       concat_opt_exact_info(&to->exb, &add->exb, enc);
04531       clear_opt_exact_info(&add->exb);
04532     }
04533     else if (exm_reach) {
04534       concat_opt_exact_info(&to->exm, &add->exb, enc);
04535       clear_opt_exact_info(&add->exb);
04536     }
04537   }
04538   select_opt_exact_info(enc, &to->exm, &add->exb);
04539   select_opt_exact_info(enc, &to->exm, &add->exm);
04540 
04541   if (to->expr.len > 0) {
04542     if (add->len.max > 0) {
04543       if (to->expr.len > (int )add->len.max)
04544         to->expr.len = (int)add->len.max;
04545 
04546       if (to->expr.mmd.max == 0)
04547         select_opt_exact_info(enc, &to->exb, &to->expr);
04548       else
04549         select_opt_exact_info(enc, &to->exm, &to->expr);
04550     }
04551   }
04552   else if (add->expr.len > 0) {
04553     copy_opt_exact_info(&to->expr, &add->expr);
04554   }
04555 
04556   select_opt_map_info(&to->map, &add->map);
04557 
04558   add_mml(&to->len, &add->len);
04559 }
04560 
04561 static void
04562 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04563 {
04564   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
04565   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
04566   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
04567   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04568   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
04569 
04570   alt_merge_mml(&to->len, &add->len);
04571 }
04572 
04573 
04574 #define MAX_NODE_OPT_INFO_REF_COUNT    5
04575 
04576 static int
04577 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04578 {
04579   int type;
04580   int r = 0;
04581 
04582   clear_node_opt_info(opt);
04583   set_bound_node_opt_info(opt, &env->mmd);
04584 
04585   type = NTYPE(node);
04586   switch (type) {
04587   case NT_LIST:
04588     {
04589       OptEnv nenv;
04590       NodeOptInfo nopt;
04591       Node* nd = node;
04592 
04593       copy_opt_env(&nenv, env);
04594       do {
04595         r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04596         if (r == 0) {
04597           add_mml(&nenv.mmd, &nopt.len);
04598           concat_left_node_opt_info(env->enc, opt, &nopt);
04599         }
04600       } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04601     }
04602     break;
04603 
04604   case NT_ALT:
04605     {
04606       NodeOptInfo nopt;
04607       Node* nd = node;
04608 
04609       do {
04610         r = optimize_node_left(NCAR(nd), &nopt, env);
04611         if (r == 0) {
04612           if (nd == node) copy_node_opt_info(opt, &nopt);
04613           else            alt_merge_node_opt_info(opt, &nopt, env);
04614         }
04615       } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04616     }
04617     break;
04618 
04619   case NT_STR:
04620     {
04621       StrNode* sn = NSTR(node);
04622       OnigDistance slen = sn->end - sn->s;
04623       int is_raw = NSTRING_IS_RAW(node);
04624 
04625       if (! NSTRING_IS_AMBIG(node)) {
04626         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04627                                   NSTRING_IS_RAW(node), env->enc);
04628         if (slen > 0) {
04629           add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04630         }
04631         set_mml(&opt->len, slen, slen);
04632       }
04633       else {
04634         OnigDistance max;
04635 
04636         if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04637           int n = onigenc_strlen(env->enc, sn->s, sn->end);
04638           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04639         }
04640         else {
04641           concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04642                                     is_raw, env->enc);
04643           opt->exb.ignore_case = 1;
04644 
04645           if (slen > 0) {
04646             r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04647                                           env->enc, env->case_fold_flag);
04648             if (r != 0) break;
04649           }
04650 
04651           max = slen;
04652         }
04653 
04654         set_mml(&opt->len, slen, max);
04655       }
04656 
04657       if ((OnigDistance)opt->exb.len == slen)
04658         opt->exb.reach_end = 1;
04659     }
04660     break;
04661 
04662   case NT_CCLASS:
04663     {
04664       int i, z;
04665       CClassNode* cc = NCCLASS(node);
04666 
04667       /* no need to check ignore case. (setted in setup_tree()) */
04668 
04669       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
04670         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04671         OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04672 
04673         set_mml(&opt->len, min, max);
04674       }
04675       else {
04676         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04677           z = BITSET_AT(cc->bs, i);
04678           if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
04679             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04680           }
04681         }
04682         set_mml(&opt->len, 1, 1);
04683       }
04684     }
04685     break;
04686 
04687   case NT_CTYPE:
04688     {
04689       int i, min, max;
04690 
04691       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04692 
04693       if (max == 1) {
04694         min = 1;
04695 
04696         switch (NCTYPE(node)->ctype) {
04697         case ONIGENC_CTYPE_WORD:
04698           if (NCTYPE(node)->not != 0) {
04699             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04700               if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
04701                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04702               }
04703             }
04704           }
04705           else {
04706             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04707               if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
04708                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04709               }
04710             }
04711           }
04712           break;
04713         }
04714       }
04715       else {
04716         min = ONIGENC_MBC_MINLEN(env->enc);
04717       }
04718       set_mml(&opt->len, min, max);
04719     }
04720     break;
04721 
04722   case NT_CANY:
04723     {
04724       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04725       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04726       set_mml(&opt->len, min, max);
04727     }
04728     break;
04729 
04730   case NT_ANCHOR:
04731     switch (NANCHOR(node)->type) {
04732     case ANCHOR_BEGIN_BUF:
04733     case ANCHOR_BEGIN_POSITION:
04734     case ANCHOR_BEGIN_LINE:
04735     case ANCHOR_END_BUF:
04736     case ANCHOR_SEMI_END_BUF:
04737     case ANCHOR_END_LINE:
04738     case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
04739       add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
04740       break;
04741 
04742     case ANCHOR_PREC_READ:
04743       {
04744         NodeOptInfo nopt;
04745 
04746         r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
04747         if (r == 0) {
04748           if (nopt.exb.len > 0)
04749             copy_opt_exact_info(&opt->expr, &nopt.exb);
04750           else if (nopt.exm.len > 0)
04751             copy_opt_exact_info(&opt->expr, &nopt.exm);
04752 
04753           opt->expr.reach_end = 0;
04754 
04755           if (nopt.map.value > 0)
04756             copy_opt_map_info(&opt->map, &nopt.map);
04757         }
04758       }
04759       break;
04760 
04761     case ANCHOR_PREC_READ_NOT:
04762     case ANCHOR_LOOK_BEHIND_NOT:
04763       break;
04764     }
04765     break;
04766 
04767   case NT_BREF:
04768     {
04769       int i;
04770       int* backs;
04771       OnigDistance min, max, tmin, tmax;
04772       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
04773       BRefNode* br = NBREF(node);
04774 
04775       if (br->state & NST_RECURSION) {
04776         set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04777         break;
04778       }
04779       backs = BACKREFS_P(br);
04780       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
04781       if (r != 0) break;
04782       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
04783       if (r != 0) break;
04784       for (i = 1; i < br->back_num; i++) {
04785         r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
04786         if (r != 0) break;
04787         r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
04788         if (r != 0) break;
04789         if (min > tmin) min = tmin;
04790         if (max < tmax) max = tmax;
04791       }
04792       if (r == 0) set_mml(&opt->len, min, max);
04793     }
04794     break;
04795 
04796 #ifdef USE_SUBEXP_CALL
04797   case NT_CALL:
04798     if (IS_CALL_RECURSION(NCALL(node)))
04799       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04800     else {
04801       OnigOptionType save = env->options;
04802       env->options = NENCLOSE(NCALL(node)->target)->option;
04803       r = optimize_node_left(NCALL(node)->target, opt, env);
04804       env->options = save;
04805     }
04806     break;
04807 #endif
04808 
04809   case NT_QTFR:
04810     {
04811       int i;
04812       OnigDistance min, max;
04813       NodeOptInfo nopt;
04814       QtfrNode* qn = NQTFR(node);
04815 
04816       r = optimize_node_left(qn->target, &nopt, env);
04817       if (r) break;
04818 
04819       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
04820         if (env->mmd.max == 0 &&
04821             NTYPE(qn->target) == NT_CANY && qn->greedy) {
04822           if (IS_MULTILINE(env->options))
04823             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
04824           else
04825             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
04826         }
04827       }
04828       else {
04829         if (qn->lower > 0) {
04830           copy_node_opt_info(opt, &nopt);
04831           if (nopt.exb.len > 0) {
04832             if (nopt.exb.reach_end) {
04833               for (i = 2; i <= qn->lower &&
04834                           ! is_full_opt_exact_info(&opt->exb); i++) {
04835                 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
04836               }
04837               if (i < qn->lower) {
04838                 opt->exb.reach_end = 0;
04839               }
04840             }
04841           }
04842 
04843           if (qn->lower != qn->upper) {
04844             opt->exb.reach_end = 0;
04845             opt->exm.reach_end = 0;
04846           }
04847           if (qn->lower > 1)
04848             opt->exm.reach_end = 0;
04849         }
04850       }
04851 
04852       min = distance_multiply(nopt.len.min, qn->lower);
04853       if (IS_REPEAT_INFINITE(qn->upper))
04854         max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
04855       else
04856         max = distance_multiply(nopt.len.max, qn->upper);
04857 
04858       set_mml(&opt->len, min, max);
04859     }
04860     break;
04861 
04862   case NT_ENCLOSE:
04863     {
04864       EncloseNode* en = NENCLOSE(node);
04865 
04866       switch (en->type) {
04867       case ENCLOSE_OPTION:
04868         {
04869           OnigOptionType save = env->options;
04870 
04871           env->options = en->option;
04872           r = optimize_node_left(en->target, opt, env);
04873           env->options = save;
04874         }
04875         break;
04876 
04877       case ENCLOSE_MEMORY:
04878 #ifdef USE_SUBEXP_CALL
04879         en->opt_count++;
04880         if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
04881           OnigDistance min, max;
04882 
04883           min = 0;
04884           max = ONIG_INFINITE_DISTANCE;
04885           if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
04886           if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
04887           set_mml(&opt->len, min, max);
04888         }
04889         else
04890 #endif
04891         {
04892           r = optimize_node_left(en->target, opt, env);
04893 
04894           if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
04895             if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
04896               remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
04897           }
04898         }
04899         break;
04900 
04901       case ENCLOSE_STOP_BACKTRACK:
04902         r = optimize_node_left(en->target, opt, env);
04903         break;
04904       }
04905     }
04906     break;
04907 
04908   default:
04909 #ifdef ONIG_DEBUG
04910     if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
04911             NTYPE(node));
04912 #endif
04913     r = ONIGERR_TYPE_BUG;
04914     break;
04915   }
04916 
04917   return r;
04918 }
04919 
04920 static int
04921 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
04922 {
04923   int r;
04924 
04925   if (e->len == 0) return 0;
04926 
04927   if (e->ignore_case) {
04928     reg->exact = (UChar* )xmalloc(e->len);
04929     CHECK_NULL_RETURN_MEMERR(reg->exact);
04930     xmemcpy(reg->exact, e->s, e->len);
04931     reg->exact_end = reg->exact + e->len;
04932     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
04933   }
04934   else {
04935     int allow_reverse;
04936 
04937     reg->exact = str_dup(e->s, e->s + e->len);
04938     CHECK_NULL_RETURN_MEMERR(reg->exact);
04939     reg->exact_end = reg->exact + e->len;
04940 
04941     allow_reverse =
04942         ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
04943 
04944     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
04945       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
04946                       reg->map, &(reg->int_map));
04947       if (r) return r;
04948 
04949       reg->optimize = (allow_reverse != 0
04950                        ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
04951     }
04952     else {
04953       reg->optimize = ONIG_OPTIMIZE_EXACT;
04954     }
04955   }
04956 
04957   reg->dmin = e->mmd.min;
04958   reg->dmax = e->mmd.max;
04959 
04960   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04961     reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact));
04962   }
04963 
04964   return 0;
04965 }
04966 
04967 static void
04968 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
04969 {
04970   int i;
04971 
04972   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04973     reg->map[i] = m->map[i];
04974 
04975   reg->optimize   = ONIG_OPTIMIZE_MAP;
04976   reg->dmin       = m->mmd.min;
04977   reg->dmax       = m->mmd.max;
04978 
04979   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04980     reg->threshold_len = (int)(reg->dmin + 1);
04981   }
04982 }
04983 
04984 static void
04985 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
04986 {
04987   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
04988   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
04989 }
04990 
04991 #ifdef ONIG_DEBUG
04992 static void print_optimize_info(FILE* f, regex_t* reg);
04993 #endif
04994 
04995 static int
04996 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
04997 {
04998 
04999   int r;
05000   NodeOptInfo opt;
05001   OptEnv env;
05002 
05003   env.enc            = reg->enc;
05004   env.options        = reg->options;
05005   env.case_fold_flag = reg->case_fold_flag;
05006   env.scan_env   = scan_env;
05007   clear_mml(&env.mmd);
05008 
05009   r = optimize_node_left(node, &opt, &env);
05010   if (r) return r;
05011 
05012   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05013         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
05014         ANCHOR_LOOK_BEHIND);
05015 
05016   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05017 
05018   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05019     reg->anchor_dmin = opt.len.min;
05020     reg->anchor_dmax = opt.len.max;
05021   }
05022 
05023   if (opt.exb.len > 0 || opt.exm.len > 0) {
05024     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05025     if (opt.map.value > 0 &&
05026         comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05027       goto set_map;
05028     }
05029     else {
05030       r = set_optimize_exact_info(reg, &opt.exb);
05031       set_sub_anchor(reg, &opt.exb.anc);
05032     }
05033   }
05034   else if (opt.map.value > 0) {
05035   set_map:
05036     set_optimize_map_info(reg, &opt.map);
05037     set_sub_anchor(reg, &opt.map.anc);
05038   }
05039   else {
05040     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05041     if (opt.len.max == 0)
05042       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05043   }
05044 
05045 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05046   if (!onig_is_prelude()) print_optimize_info(stderr, reg);
05047 #endif
05048   return r;
05049 }
05050 
05051 static void
05052 clear_optimize_info(regex_t* reg)
05053 {
05054   reg->optimize      = ONIG_OPTIMIZE_NONE;
05055   reg->anchor        = 0;
05056   reg->anchor_dmin   = 0;
05057   reg->anchor_dmax   = 0;
05058   reg->sub_anchor    = 0;
05059   reg->exact_end     = (UChar* )NULL;
05060   reg->threshold_len = 0;
05061   if (IS_NOT_NULL(reg->exact)) {
05062     xfree(reg->exact);
05063     reg->exact = (UChar* )NULL;
05064   }
05065 }
05066 
05067 #ifdef ONIG_DEBUG
05068 
05069 static void print_enc_string(FILE* fp, OnigEncoding enc,
05070                              const UChar *s, const UChar *end)
05071 {
05072   fprintf(fp, "\nPATTERN: /");
05073 
05074   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05075     const UChar *p;
05076     OnigCodePoint code;
05077 
05078     p = s;
05079     while (p < end) {
05080       code = ONIGENC_MBC_TO_CODE(enc, p, end);
05081       if (code >= 0x80) {
05082         fprintf(fp, " 0x%04x ", (int )code);
05083       }
05084       else {
05085         fputc((int )code, fp);
05086       }
05087 
05088       p += enclen(enc, p, end);
05089     }
05090   }
05091   else {
05092     while (s < end) {
05093       fputc((int )*s, fp);
05094       s++;
05095     }
05096   }
05097 
05098   fprintf(fp, "/ (%s)\n", enc->name);
05099 }
05100 
05101 static void
05102 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05103 {
05104   if (a == ONIG_INFINITE_DISTANCE)
05105     fputs("inf", f);
05106   else
05107     fprintf(f, "(%"PRIuSIZE")", a);
05108 
05109   fputs("-", f);
05110 
05111   if (b == ONIG_INFINITE_DISTANCE)
05112     fputs("inf", f);
05113   else
05114     fprintf(f, "(%"PRIuSIZE")", b);
05115 }
05116 
05117 static void
05118 print_anchor(FILE* f, int anchor)
05119 {
05120   int q = 0;
05121 
05122   fprintf(f, "[");
05123 
05124   if (anchor & ANCHOR_BEGIN_BUF) {
05125     fprintf(f, "begin-buf");
05126     q = 1;
05127   }
05128   if (anchor & ANCHOR_BEGIN_LINE) {
05129     if (q) fprintf(f, ", ");
05130     q = 1;
05131     fprintf(f, "begin-line");
05132   }
05133   if (anchor & ANCHOR_BEGIN_POSITION) {
05134     if (q) fprintf(f, ", ");
05135     q = 1;
05136     fprintf(f, "begin-pos");
05137   }
05138   if (anchor & ANCHOR_END_BUF) {
05139     if (q) fprintf(f, ", ");
05140     q = 1;
05141     fprintf(f, "end-buf");
05142   }
05143   if (anchor & ANCHOR_SEMI_END_BUF) {
05144     if (q) fprintf(f, ", ");
05145     q = 1;
05146     fprintf(f, "semi-end-buf");
05147   }
05148   if (anchor & ANCHOR_END_LINE) {
05149     if (q) fprintf(f, ", ");
05150     q = 1;
05151     fprintf(f, "end-line");
05152   }
05153   if (anchor & ANCHOR_ANYCHAR_STAR) {
05154     if (q) fprintf(f, ", ");
05155     q = 1;
05156     fprintf(f, "anychar-star");
05157   }
05158   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05159     if (q) fprintf(f, ", ");
05160     fprintf(f, "anychar-star-pl");
05161   }
05162 
05163   fprintf(f, "]");
05164 }
05165 
05166 static void
05167 print_optimize_info(FILE* f, regex_t* reg)
05168 {
05169   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05170                               "EXACT_IC", "MAP" };
05171 
05172   fprintf(f, "optimize: %s\n", on[reg->optimize]);
05173   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
05174   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05175     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05176   fprintf(f, "\n");
05177 
05178   if (reg->optimize) {
05179     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
05180     fprintf(f, "\n");
05181   }
05182   fprintf(f, "\n");
05183 
05184   if (reg->exact) {
05185     UChar *p;
05186     fprintf(f, "exact: [");
05187     for (p = reg->exact; p < reg->exact_end; p++) {
05188       fputc(*p, f);
05189     }
05190     fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
05191   }
05192   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05193     int c, i, n = 0;
05194 
05195     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05196       if (reg->map[i]) n++;
05197 
05198     fprintf(f, "map: n=%d\n", n);
05199     if (n > 0) {
05200       c = 0;
05201       fputc('[', f);
05202       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05203         if (reg->map[i] != 0) {
05204           if (c > 0)  fputs(", ", f);
05205           c++;
05206           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05207               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05208             fputc(i, f);
05209           else
05210             fprintf(f, "%d", i);
05211         }
05212       }
05213       fprintf(f, "]\n");
05214     }
05215   }
05216 }
05217 #endif /* ONIG_DEBUG */
05218 
05219 
05220 extern void
05221 onig_free_body(regex_t* reg)
05222 {
05223   if (IS_NOT_NULL(reg)) {
05224     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
05225     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
05226     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
05227     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05228     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
05229     if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
05230 
05231 #ifdef USE_NAMED_GROUP
05232     onig_names_free(reg);
05233 #endif
05234   }
05235 }
05236 
05237 extern void
05238 onig_free(regex_t* reg)
05239 {
05240   if (IS_NOT_NULL(reg)) {
05241     onig_free_body(reg);
05242     xfree(reg);
05243   }
05244 }
05245 
05246 size_t
05247 onig_memsize(const regex_t *reg)
05248 {
05249     size_t size = sizeof(regex_t);
05250     if (IS_NOT_NULL(reg->p))                size += reg->alloc;
05251     if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
05252     if (IS_NOT_NULL(reg->int_map))          size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05253     if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05254     if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05255     if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
05256 
05257     return size;
05258 }
05259 
05260 #define REGEX_TRANSFER(to,from) do {\
05261   (to)->state = ONIG_STATE_MODIFY;\
05262   onig_free_body(to);\
05263   xmemcpy(to, from, sizeof(regex_t));\
05264   xfree(from);\
05265 } while (0)
05266 
05267 extern void
05268 onig_transfer(regex_t* to, regex_t* from)
05269 {
05270   THREAD_ATOMIC_START;
05271   REGEX_TRANSFER(to, from);
05272   THREAD_ATOMIC_END;
05273 }
05274 
05275 #define REGEX_CHAIN_HEAD(reg) do {\
05276   while (IS_NOT_NULL((reg)->chain)) {\
05277     (reg) = (reg)->chain;\
05278   }\
05279 } while (0)
05280 
05281 extern void
05282 onig_chain_link_add(regex_t* to, regex_t* add)
05283 {
05284   THREAD_ATOMIC_START;
05285   REGEX_CHAIN_HEAD(to);
05286   to->chain = add;
05287   THREAD_ATOMIC_END;
05288 }
05289 
05290 extern void
05291 onig_chain_reduce(regex_t* reg)
05292 {
05293   regex_t *head, *prev;
05294 
05295   prev = reg;
05296   head = prev->chain;
05297   if (IS_NOT_NULL(head)) {
05298     reg->state = ONIG_STATE_MODIFY;
05299     while (IS_NOT_NULL(head->chain)) {
05300       prev = head;
05301       head = head->chain;
05302     }
05303     prev->chain = (regex_t* )NULL;
05304     REGEX_TRANSFER(reg, head);
05305   }
05306 }
05307 
05308 #ifdef ONIG_DEBUG
05309 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05310 #endif
05311 #ifdef ONIG_DEBUG_PARSE_TREE
05312 static void print_tree P_((FILE* f, Node* node));
05313 #endif
05314 
05315 extern int
05316 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05317               OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05318 {
05319 #define COMPILE_INIT_SIZE  20
05320 
05321   int r;
05322   OnigDistance init_size;
05323   Node*  root;
05324   ScanEnv  scan_env = {0};
05325 #ifdef USE_SUBEXP_CALL
05326   UnsetAddrList  uslist;
05327 #endif
05328 
05329   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05330 
05331   scan_env.sourcefile = sourcefile;
05332   scan_env.sourceline = sourceline;
05333   reg->state = ONIG_STATE_COMPILING;
05334 
05335 #ifdef ONIG_DEBUG
05336   if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
05337 #endif
05338 
05339   if (reg->alloc == 0) {
05340     init_size = (pattern_end - pattern) * 2;
05341     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05342     r = BBUF_INIT(reg, init_size);
05343     if (r != 0) goto end;
05344   }
05345   else
05346     reg->used = 0;
05347 
05348   reg->num_mem            = 0;
05349   reg->num_repeat         = 0;
05350   reg->num_null_check     = 0;
05351   reg->repeat_range_alloc = 0;
05352   reg->repeat_range       = (OnigRepeatRange* )NULL;
05353 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05354   reg->num_comb_exp_check = 0;
05355 #endif
05356 
05357   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05358   if (r != 0) goto err;
05359 
05360 #ifdef ONIG_DEBUG_PARSE_TREE
05361 # if 0
05362   fprintf(stderr, "ORIGINAL PARSE TREE:\n");
05363   if (!onig_is_prelude()) {
05364     print_tree(stderr, root);
05365   }
05366 # endif
05367 #endif
05368 
05369 #ifdef USE_NAMED_GROUP
05370   /* mixed use named group and no-named group */
05371   if (scan_env.num_named > 0 &&
05372       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05373       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05374     if (scan_env.num_named != scan_env.num_mem)
05375       r = disable_noname_group_capture(&root, reg, &scan_env);
05376     else
05377       r = numbered_ref_check(root);
05378 
05379     if (r != 0) goto err;
05380   }
05381 #endif
05382 
05383 #ifdef USE_SUBEXP_CALL
05384   if (scan_env.num_call > 0) {
05385     r = unset_addr_list_init(&uslist, scan_env.num_call);
05386     if (r != 0) goto err;
05387     scan_env.unset_addr_list = &uslist;
05388     r = setup_subexp_call(root, &scan_env);
05389     if (r != 0) goto err_unset;
05390     r = subexp_recursive_check_trav(root, &scan_env);
05391     if (r  < 0) goto err_unset;
05392     r = subexp_inf_recursive_check_trav(root, &scan_env);
05393     if (r != 0) goto err_unset;
05394 
05395     reg->num_call = scan_env.num_call;
05396   }
05397   else
05398     reg->num_call = 0;
05399 #endif
05400 
05401   r = setup_tree(root, reg, 0, &scan_env);
05402   if (r != 0) goto err_unset;
05403 
05404 #ifdef ONIG_DEBUG_PARSE_TREE
05405   if (!onig_is_prelude()) print_tree(stderr, root);
05406 #endif
05407 
05408   reg->capture_history  = scan_env.capture_history;
05409   reg->bt_mem_start     = scan_env.bt_mem_start;
05410   reg->bt_mem_start    |= reg->capture_history;
05411   if (IS_FIND_CONDITION(reg->options))
05412     BIT_STATUS_ON_ALL(reg->bt_mem_end);
05413   else {
05414     reg->bt_mem_end  = scan_env.bt_mem_end;
05415     reg->bt_mem_end |= reg->capture_history;
05416   }
05417 
05418 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05419   if (scan_env.backrefed_mem == 0
05420 #ifdef USE_SUBEXP_CALL
05421       || scan_env.num_call == 0
05422 #endif
05423       ) {
05424     setup_comb_exp_check(root, 0, &scan_env);
05425 #ifdef USE_SUBEXP_CALL
05426     if (scan_env.has_recursion != 0) {
05427       scan_env.num_comb_exp_check = 0;
05428     }
05429     else
05430 #endif
05431     if (scan_env.comb_exp_max_regnum > 0) {
05432       int i;
05433       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05434         if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05435           scan_env.num_comb_exp_check = 0;
05436           break;
05437         }
05438       }
05439     }
05440   }
05441 
05442   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05443 #endif
05444 
05445   clear_optimize_info(reg);
05446 #ifndef ONIG_DONT_OPTIMIZE
05447   r = set_optimize_info_from_tree(root, reg, &scan_env);
05448   if (r != 0) goto err_unset;
05449 #endif
05450 
05451   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05452     xfree(scan_env.mem_nodes_dynamic);
05453     scan_env.mem_nodes_dynamic = (Node** )NULL;
05454   }
05455 
05456   r = compile_tree(root, reg);
05457   if (r == 0) {
05458     r = add_opcode(reg, OP_END);
05459 #ifdef USE_SUBEXP_CALL
05460     if (scan_env.num_call > 0) {
05461       r = unset_addr_list_fix(&uslist, reg);
05462       unset_addr_list_end(&uslist);
05463       if (r) goto err;
05464     }
05465 #endif
05466 
05467     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05468       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05469     else {
05470       if (reg->bt_mem_start != 0)
05471         reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05472       else
05473         reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05474     }
05475   }
05476 #ifdef USE_SUBEXP_CALL
05477   else if (scan_env.num_call > 0) {
05478     unset_addr_list_end(&uslist);
05479   }
05480 #endif
05481   onig_node_free(root);
05482 
05483 #ifdef ONIG_DEBUG_COMPILE
05484 #ifdef USE_NAMED_GROUP
05485   if (!onig_is_prelude()) onig_print_names(stderr, reg);
05486 #endif
05487   if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
05488 #endif
05489 
05490  end:
05491   reg->state = ONIG_STATE_NORMAL;
05492   return r;
05493 
05494  err_unset:
05495 #ifdef USE_SUBEXP_CALL
05496   if (scan_env.num_call > 0) {
05497     unset_addr_list_end(&uslist);
05498   }
05499 #endif
05500  err:
05501   if (IS_NOT_NULL(scan_env.error)) {
05502     if (IS_NOT_NULL(einfo)) {
05503       einfo->enc     = scan_env.enc;
05504       einfo->par     = scan_env.error;
05505       einfo->par_end = scan_env.error_end;
05506     }
05507   }
05508 
05509   onig_node_free(root);
05510   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05511       xfree(scan_env.mem_nodes_dynamic);
05512   return r;
05513 }
05514 
05515 #ifdef USE_RECOMPILE_API
05516 extern int
05517 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05518             OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05519             OnigErrorInfo* einfo)
05520 {
05521   int r;
05522   regex_t *new_reg;
05523 
05524   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05525   if (r) return r;
05526   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05527     onig_transfer(reg, new_reg);
05528   }
05529   else {
05530     onig_chain_link_add(reg, new_reg);
05531   }
05532   return 0;
05533 }
05534 #endif
05535 
05536 static int onig_inited = 0;
05537 
05538 extern int
05539 onig_reg_init(regex_t* reg, OnigOptionType option,
05540               OnigCaseFoldType case_fold_flag,
05541               OnigEncoding enc, const OnigSyntaxType* syntax)
05542 {
05543   if (! onig_inited)
05544     onig_init();
05545 
05546   if (IS_NULL(reg))
05547     return ONIGERR_INVALID_ARGUMENT;
05548 
05549   if (ONIGENC_IS_UNDEF(enc))
05550     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
05551 
05552   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05553       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05554     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05555   }
05556 
05557   (reg)->state = ONIG_STATE_MODIFY;
05558 
05559   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05560     option |= syntax->options;
05561     option &= ~ONIG_OPTION_SINGLELINE;
05562   }
05563   else
05564     option |= syntax->options;
05565 
05566   (reg)->enc              = enc;
05567   (reg)->options          = option;
05568   (reg)->syntax           = syntax;
05569   (reg)->optimize         = 0;
05570   (reg)->exact            = (UChar* )NULL;
05571   (reg)->int_map          = (int* )NULL;
05572   (reg)->int_map_backward = (int* )NULL;
05573   (reg)->chain            = (regex_t* )NULL;
05574 
05575   (reg)->p                = (UChar* )NULL;
05576   (reg)->alloc            = 0;
05577   (reg)->used             = 0;
05578   (reg)->name_table       = (void* )NULL;
05579 
05580   (reg)->case_fold_flag   = case_fold_flag;
05581   return 0;
05582 }
05583 
05584 extern int
05585 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05586           const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05587           OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05588 {
05589   int r;
05590 
05591   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05592   if (r) return r;
05593 
05594   r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05595   return r;
05596 }
05597 
05598 extern int
05599 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05600           OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05601           OnigErrorInfo* einfo)
05602 {
05603   int r;
05604 
05605   *reg = (regex_t* )xmalloc(sizeof(regex_t));
05606   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05607 
05608   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05609   if (r) goto err;
05610 
05611   r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05612   if (r) {
05613   err:
05614     onig_free(*reg);
05615     *reg = NULL;
05616   }
05617   return r;
05618 }
05619 
05620 
05621 extern int
05622 onig_init(void)
05623 {
05624   if (onig_inited != 0)
05625     return 0;
05626 
05627   THREAD_SYSTEM_INIT;
05628   THREAD_ATOMIC_START;
05629 
05630   onig_inited = 1;
05631 
05632   onigenc_init();
05633   /* onigenc_set_default_caseconv_table((UChar* )0); */
05634 
05635 #ifdef ONIG_DEBUG_STATISTICS
05636   onig_statistics_init();
05637 #endif
05638 
05639   THREAD_ATOMIC_END;
05640   return 0;
05641 }
05642 
05643 
05644 extern int
05645 onig_end(void)
05646 {
05647   THREAD_ATOMIC_START;
05648 
05649 #ifdef ONIG_DEBUG_STATISTICS
05650   if (!onig_is_prelude()) onig_print_statistics(stderr);
05651 #endif
05652 
05653 #ifdef USE_SHARED_CCLASS_TABLE
05654   onig_free_shared_cclass_table();
05655 #endif
05656 
05657 #ifdef USE_PARSE_TREE_NODE_RECYCLE
05658   onig_free_node_list();
05659 #endif
05660 
05661   onig_inited = 0;
05662 
05663   THREAD_ATOMIC_END;
05664   THREAD_SYSTEM_END;
05665   return 0;
05666 }
05667 
05668 extern int
05669 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
05670 {
05671   OnigCodePoint n, *data;
05672   OnigCodePoint low, high, x;
05673 
05674   GET_CODE_POINT(n, p);
05675   data = (OnigCodePoint* )p;
05676   data++;
05677 
05678   for (low = 0, high = n; low < high; ) {
05679     x = (low + high) >> 1;
05680     if (code > data[x * 2 + 1])
05681       low = x + 1;
05682     else
05683       high = x;
05684   }
05685 
05686   return ((low < n && code >= data[low * 2]) ? 1 : 0);
05687 }
05688 
05689 extern int
05690 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
05691 {
05692   int found;
05693 
05694   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
05695     if (IS_NULL(cc->mbuf)) {
05696       found = 0;
05697     }
05698     else {
05699       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
05700     }
05701   }
05702   else {
05703     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
05704   }
05705 
05706   if (IS_NCCLASS_NOT(cc))
05707     return !found;
05708   else
05709     return found;
05710 }
05711 
05712 extern int
05713 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
05714 {
05715   int len;
05716 
05717   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05718     len = 2;
05719   }
05720   else {
05721     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
05722   }
05723   return onig_is_code_in_cc_len(len, code, cc);
05724 }
05725 
05726 
05727 #ifdef ONIG_DEBUG
05728 
05729 /* arguments type */
05730 #define ARG_SPECIAL     -1
05731 #define ARG_NON          0
05732 #define ARG_RELADDR      1
05733 #define ARG_ABSADDR      2
05734 #define ARG_LENGTH       3
05735 #define ARG_MEMNUM       4
05736 #define ARG_OPTION       5
05737 #define ARG_STATE_CHECK  6
05738 
05739 OnigOpInfoType OnigOpInfo[] = {
05740   { OP_FINISH,            "finish",          ARG_NON },
05741   { OP_END,               "end",             ARG_NON },
05742   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
05743   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
05744   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
05745   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
05746   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
05747   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
05748   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
05749   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
05750   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
05751   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
05752   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
05753   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
05754   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
05755   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
05756   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
05757   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
05758   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
05759   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
05760   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
05761   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
05762   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
05763   { OP_ANYCHAR,           "anychar",         ARG_NON },
05764   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
05765   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
05766   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
05767   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
05768   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
05769   { OP_WORD,                "word",            ARG_NON },
05770   { OP_NOT_WORD,            "not-word",        ARG_NON },
05771   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
05772   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
05773   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
05774   { OP_WORD_END,            "word-end",        ARG_NON },
05775   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
05776   { OP_END_BUF,             "end-buf",         ARG_NON },
05777   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
05778   { OP_END_LINE,            "end-line",        ARG_NON },
05779   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
05780   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
05781   { OP_BACKREF1,            "backref1",             ARG_NON },
05782   { OP_BACKREF2,            "backref2",             ARG_NON },
05783   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
05784   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
05785   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
05786   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
05787   { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL },
05788   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
05789   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
05790   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
05791   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
05792   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
05793   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
05794   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
05795   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
05796   { OP_FAIL,                "fail",                 ARG_NON },
05797   { OP_JUMP,                "jump",                 ARG_RELADDR },
05798   { OP_PUSH,                "push",                 ARG_RELADDR },
05799   { OP_POP,                 "pop",                  ARG_NON },
05800   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
05801   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
05802   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
05803   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
05804   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
05805   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
05806   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
05807   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
05808   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
05809   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
05810   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
05811   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
05812   { OP_PUSH_POS,             "push-pos",             ARG_NON },
05813   { OP_POP_POS,              "pop-pos",              ARG_NON },
05814   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
05815   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
05816   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
05817   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
05818   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
05819   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
05820   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
05821   { OP_CALL,                 "call",                 ARG_ABSADDR },
05822   { OP_RETURN,               "return",               ARG_NON },
05823   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
05824   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
05825   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
05826   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
05827   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
05828     "state-check-anychar-ml*", ARG_STATE_CHECK },
05829   { -1, "", ARG_NON }
05830 };
05831 
05832 static const char*
05833 op2name(int opcode)
05834 {
05835   int i;
05836 
05837   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05838     if (opcode == OnigOpInfo[i].opcode)
05839       return OnigOpInfo[i].name;
05840   }
05841   return "";
05842 }
05843 
05844 static int
05845 op2arg_type(int opcode)
05846 {
05847   int i;
05848 
05849   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05850     if (opcode == OnigOpInfo[i].opcode)
05851       return OnigOpInfo[i].arg_type;
05852   }
05853   return ARG_SPECIAL;
05854 }
05855 
05856 static void
05857 Indent(FILE* f, int indent)
05858 {
05859   int i;
05860   for (i = 0; i < indent; i++) putc(' ', f);
05861 }
05862 
05863 static void
05864 p_string(FILE* f, int len, UChar* s)
05865 {
05866   fputs(":", f);
05867   while (len-- > 0) { fputc(*s++, f); }
05868 }
05869 
05870 static void
05871 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
05872 {
05873   int x = len * mb_len;
05874 
05875   fprintf(f, ":%d:", len);
05876   while (x-- > 0) { fputc(*s++, f); }
05877 }
05878 
05879 extern void
05880 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
05881                               OnigEncoding enc)
05882 {
05883   int i, n, arg_type;
05884   RelAddrType addr;
05885   LengthType len;
05886   MemNumType mem;
05887   StateCheckNumType scn;
05888   OnigCodePoint code;
05889   UChar *q;
05890 
05891   fprintf(f, "[%s", op2name(*bp));
05892   arg_type = op2arg_type(*bp);
05893   if (arg_type != ARG_SPECIAL) {
05894     bp++;
05895     switch (arg_type) {
05896     case ARG_NON:
05897       break;
05898     case ARG_RELADDR:
05899       GET_RELADDR_INC(addr, bp);
05900       fprintf(f, ":(%d)", addr);
05901       break;
05902     case ARG_ABSADDR:
05903       GET_ABSADDR_INC(addr, bp);
05904       fprintf(f, ":(%d)", addr);
05905       break;
05906     case ARG_LENGTH:
05907       GET_LENGTH_INC(len, bp);
05908       fprintf(f, ":%d", len);
05909       break;
05910     case ARG_MEMNUM:
05911       mem = *((MemNumType* )bp);
05912       bp += SIZE_MEMNUM;
05913       fprintf(f, ":%d", mem);
05914       break;
05915     case ARG_OPTION:
05916       {
05917         OnigOptionType option = *((OnigOptionType* )bp);
05918         bp += SIZE_OPTION;
05919         fprintf(f, ":%d", option);
05920       }
05921       break;
05922 
05923     case ARG_STATE_CHECK:
05924       scn = *((StateCheckNumType* )bp);
05925       bp += SIZE_STATE_CHECK_NUM;
05926       fprintf(f, ":%d", scn);
05927       break;
05928     }
05929   }
05930   else {
05931     switch (*bp++) {
05932     case OP_EXACT1:
05933     case OP_ANYCHAR_STAR_PEEK_NEXT:
05934     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
05935       p_string(f, 1, bp++); break;
05936     case OP_EXACT2:
05937       p_string(f, 2, bp); bp += 2; break;
05938     case OP_EXACT3:
05939       p_string(f, 3, bp); bp += 3; break;
05940     case OP_EXACT4:
05941       p_string(f, 4, bp); bp += 4; break;
05942     case OP_EXACT5:
05943       p_string(f, 5, bp); bp += 5; break;
05944     case OP_EXACTN:
05945       GET_LENGTH_INC(len, bp);
05946       p_len_string(f, len, 1, bp);
05947       bp += len;
05948       break;
05949 
05950     case OP_EXACTMB2N1:
05951       p_string(f, 2, bp); bp += 2; break;
05952     case OP_EXACTMB2N2:
05953       p_string(f, 4, bp); bp += 4; break;
05954     case OP_EXACTMB2N3:
05955       p_string(f, 6, bp); bp += 6; break;
05956     case OP_EXACTMB2N:
05957       GET_LENGTH_INC(len, bp);
05958       p_len_string(f, len, 2, bp);
05959       bp += len * 2;
05960       break;
05961     case OP_EXACTMB3N:
05962       GET_LENGTH_INC(len, bp);
05963       p_len_string(f, len, 3, bp);
05964       bp += len * 3;
05965       break;
05966     case OP_EXACTMBN:
05967       {
05968         int mb_len;
05969 
05970         GET_LENGTH_INC(mb_len, bp);
05971         GET_LENGTH_INC(len, bp);
05972         fprintf(f, ":%d:%d:", mb_len, len);
05973         n = len * mb_len;
05974         while (n-- > 0) { fputc(*bp++, f); }
05975       }
05976       break;
05977 
05978     case OP_EXACT1_IC:
05979       len = enclen(enc, bp, bpend);
05980       p_string(f, len, bp);
05981       bp += len;
05982       break;
05983     case OP_EXACTN_IC:
05984       GET_LENGTH_INC(len, bp);
05985       p_len_string(f, len, 1, bp);
05986       bp += len;
05987       break;
05988 
05989     case OP_CCLASS:
05990       n = bitset_on_num((BitSetRef )bp);
05991       bp += SIZE_BITSET;
05992       fprintf(f, ":%d", n);
05993       break;
05994 
05995     case OP_CCLASS_NOT:
05996       n = bitset_on_num((BitSetRef )bp);
05997       bp += SIZE_BITSET;
05998       fprintf(f, ":%d", n);
05999       break;
06000 
06001     case OP_CCLASS_MB:
06002     case OP_CCLASS_MB_NOT:
06003       GET_LENGTH_INC(len, bp);
06004       q = bp;
06005 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06006       ALIGNMENT_RIGHT(q);
06007 #endif
06008       GET_CODE_POINT(code, q);
06009       bp += len;
06010       fprintf(f, ":%d:%d", (int )code, len);
06011       break;
06012 
06013     case OP_CCLASS_MIX:
06014     case OP_CCLASS_MIX_NOT:
06015       n = bitset_on_num((BitSetRef )bp);
06016       bp += SIZE_BITSET;
06017       GET_LENGTH_INC(len, bp);
06018       q = bp;
06019 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06020       ALIGNMENT_RIGHT(q);
06021 #endif
06022       GET_CODE_POINT(code, q);
06023       bp += len;
06024       fprintf(f, ":%d:%d:%d", n, (int )code, len);
06025       break;
06026 
06027     case OP_CCLASS_NODE:
06028       {
06029         CClassNode *cc;
06030 
06031         GET_POINTER_INC(cc, bp);
06032         n = bitset_on_num(cc->bs);
06033         fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
06034       }
06035       break;
06036 
06037     case OP_BACKREFN_IC:
06038       mem = *((MemNumType* )bp);
06039       bp += SIZE_MEMNUM;
06040       fprintf(f, ":%d", mem);
06041       break;
06042 
06043     case OP_BACKREF_MULTI_IC:
06044     case OP_BACKREF_MULTI:
06045       fputs(" ", f);
06046       GET_LENGTH_INC(len, bp);
06047       for (i = 0; i < len; i++) {
06048         GET_MEMNUM_INC(mem, bp);
06049         if (i > 0) fputs(", ", f);
06050         fprintf(f, "%d", mem);
06051       }
06052       break;
06053 
06054     case OP_BACKREF_WITH_LEVEL:
06055       {
06056         OnigOptionType option;
06057         LengthType level;
06058 
06059         GET_OPTION_INC(option, bp);
06060         fprintf(f, ":%d", option);
06061         GET_LENGTH_INC(level, bp);
06062         fprintf(f, ":%d", level);
06063 
06064         fputs(" ", f);
06065         GET_LENGTH_INC(len, bp);
06066         for (i = 0; i < len; i++) {
06067           GET_MEMNUM_INC(mem, bp);
06068           if (i > 0) fputs(", ", f);
06069           fprintf(f, "%d", mem);
06070         }
06071       }
06072       break;
06073 
06074     case OP_REPEAT:
06075     case OP_REPEAT_NG:
06076       {
06077         mem = *((MemNumType* )bp);
06078         bp += SIZE_MEMNUM;
06079         addr = *((RelAddrType* )bp);
06080         bp += SIZE_RELADDR;
06081         fprintf(f, ":%d:%d", mem, addr);
06082       }
06083       break;
06084 
06085     case OP_PUSH_OR_JUMP_EXACT1:
06086     case OP_PUSH_IF_PEEK_NEXT:
06087       addr = *((RelAddrType* )bp);
06088       bp += SIZE_RELADDR;
06089       fprintf(f, ":(%d)", addr);
06090       p_string(f, 1, bp);
06091       bp += 1;
06092       break;
06093 
06094     case OP_LOOK_BEHIND:
06095       GET_LENGTH_INC(len, bp);
06096       fprintf(f, ":%d", len);
06097       break;
06098 
06099     case OP_PUSH_LOOK_BEHIND_NOT:
06100       GET_RELADDR_INC(addr, bp);
06101       GET_LENGTH_INC(len, bp);
06102       fprintf(f, ":%d:(%d)", len, addr);
06103       break;
06104 
06105     case OP_STATE_CHECK_PUSH:
06106     case OP_STATE_CHECK_PUSH_OR_JUMP:
06107       scn = *((StateCheckNumType* )bp);
06108       bp += SIZE_STATE_CHECK_NUM;
06109       addr = *((RelAddrType* )bp);
06110       bp += SIZE_RELADDR;
06111       fprintf(f, ":%d:(%d)", scn, addr);
06112       break;
06113 
06114     default:
06115       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06116               *--bp);
06117     }
06118   }
06119   fputs("]", f);
06120   if (nextp) *nextp = bp;
06121 }
06122 
06123 static void
06124 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06125 {
06126   int ncode;
06127   UChar* bp = reg->p;
06128   UChar* end = reg->p + reg->used;
06129 
06130   fprintf(f, "code length: %d\n", reg->used);
06131 
06132   ncode = -1;
06133   while (bp < end) {
06134     ncode++;
06135     if (bp > reg->p) {
06136       if (ncode % 5 == 0)
06137         fprintf(f, "\n%ld:", bp-reg->p);
06138       else
06139         fprintf(f, " %ld:", bp-reg->p);
06140     }
06141     onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06142   }
06143 
06144   fprintf(f, "\n");
06145 }
06146 
06147 static void
06148 print_indent_tree(FILE* f, Node* node, int indent)
06149 {
06150   int i, type, container_p = 0;
06151   int add = 3;
06152   UChar* p;
06153 
06154   Indent(f, indent);
06155   if (IS_NULL(node)) {
06156     fprintf(f, "ERROR: null node!!!\n");
06157     exit (0);
06158   }
06159 
06160   type = NTYPE(node);
06161   switch (type) {
06162   case NT_LIST:
06163   case NT_ALT:
06164     if (NTYPE(node) == NT_LIST)
06165       fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
06166     else
06167       fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
06168 
06169     print_indent_tree(f, NCAR(node), indent + add);
06170     while (IS_NOT_NULL(node = NCDR(node))) {
06171       if (NTYPE(node) != type) {
06172         fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06173         exit(0);
06174       }
06175       print_indent_tree(f, NCAR(node), indent + add);
06176     }
06177     break;
06178 
06179   case NT_STR:
06180     fprintf(f, "<string%s:%"PRIxPTR">",
06181             (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
06182     for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06183       if (*p >= 0x20 && *p < 0x7f)
06184         fputc(*p, f);
06185       else {
06186         fprintf(f, " 0x%02x", *p);
06187       }
06188     }
06189     break;
06190 
06191   case NT_CCLASS:
06192     fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
06193     if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06194     if (NCCLASS(node)->mbuf) {
06195       BBuf* bbuf = NCCLASS(node)->mbuf;
06196       for (i = 0; i < (int)bbuf->used; i++) {
06197         if (i > 0) fprintf(f, ",");
06198         fprintf(f, "%0x", bbuf->p[i]);
06199       }
06200     }
06201     break;
06202 
06203   case NT_CTYPE:
06204     fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
06205     switch (NCTYPE(node)->ctype) {
06206     case ONIGENC_CTYPE_WORD:
06207       if (NCTYPE(node)->not != 0)
06208         fputs("not word",       f);
06209       else
06210         fputs("word",           f);
06211       break;
06212 
06213     default:
06214       fprintf(f, "ERROR: undefined ctype.\n");
06215       exit(0);
06216     }
06217     break;
06218 
06219   case NT_CANY:
06220     fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
06221     break;
06222 
06223   case NT_ANCHOR:
06224     fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
06225     switch (NANCHOR(node)->type) {
06226     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
06227     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
06228     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
06229     case ANCHOR_END_LINE:       fputs("end line",       f); break;
06230     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
06231     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06232 
06233     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
06234     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
06235 #ifdef USE_WORD_BEGIN_END
06236     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
06237     case ANCHOR_WORD_END:        fputs("word end", f);       break;
06238 #endif
06239     case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
06240     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
06241     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
06242     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
06243 
06244     default:
06245       fprintf(f, "ERROR: undefined anchor type.\n");
06246       break;
06247     }
06248     break;
06249 
06250   case NT_BREF:
06251     {
06252       int* p;
06253       BRefNode* br = NBREF(node);
06254       p = BACKREFS_P(br);
06255       fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
06256       for (i = 0; i < br->back_num; i++) {
06257         if (i > 0) fputs(", ", f);
06258         fprintf(f, "%d", p[i]);
06259       }
06260     }
06261     break;
06262 
06263 #ifdef USE_SUBEXP_CALL
06264   case NT_CALL:
06265     {
06266       CallNode* cn = NCALL(node);
06267       fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
06268       p_string(f, cn->name_end - cn->name, cn->name);
06269     }
06270     break;
06271 #endif
06272 
06273   case NT_QTFR:
06274     fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
06275             NQTFR(node)->lower, NQTFR(node)->upper,
06276             (NQTFR(node)->greedy ? "" : "?"));
06277     print_indent_tree(f, NQTFR(node)->target, indent + add);
06278     break;
06279 
06280   case NT_ENCLOSE:
06281     fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
06282     switch (NENCLOSE(node)->type) {
06283     case ENCLOSE_OPTION:
06284       fprintf(f, "option:%d\n", NENCLOSE(node)->option);
06285       print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06286       break;
06287     case ENCLOSE_MEMORY:
06288       fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06289       break;
06290     case ENCLOSE_STOP_BACKTRACK:
06291       fprintf(f, "stop-bt");
06292       break;
06293 
06294     default:
06295       break;
06296     }
06297     fprintf(f, "\n");
06298     print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06299     break;
06300 
06301   default:
06302     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06303     break;
06304   }
06305 
06306   if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06307       type != NT_ENCLOSE)
06308     fprintf(f, "\n");
06309 
06310   if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
06311 
06312   fflush(f);
06313 }
06314 #endif /* ONIG_DEBUG */
06315 
06316 #ifdef ONIG_DEBUG_PARSE_TREE
06317 static void
06318 print_tree(FILE* f, Node* node)
06319 {
06320   print_indent_tree(f, node, 0);
06321 }
06322 #endif
06323