|
Ruby
1.9.3p448(2013-06-27revision41675)
|
00001 /********************************************************************** 00002 00003 marshal.c - 00004 00005 $Author: usa $ 00006 created at: Thu Apr 27 16:30:01 JST 1995 00007 00008 Copyright (C) 1993-2007 Yukihiro Matsumoto 00009 00010 **********************************************************************/ 00011 00012 #include "ruby/ruby.h" 00013 #include "ruby/io.h" 00014 #include "ruby/st.h" 00015 #include "ruby/util.h" 00016 #include "ruby/encoding.h" 00017 #include "internal.h" 00018 00019 #include <math.h> 00020 #ifdef HAVE_FLOAT_H 00021 #include <float.h> 00022 #endif 00023 #ifdef HAVE_IEEEFP_H 00024 #include <ieeefp.h> 00025 #endif 00026 00027 #define BITSPERSHORT (2*CHAR_BIT) 00028 #define SHORTMASK ((1<<BITSPERSHORT)-1) 00029 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT) 00030 00031 #if SIZEOF_SHORT == SIZEOF_BDIGITS 00032 #define SHORTLEN(x) (x) 00033 #else 00034 static long 00035 shortlen(long len, BDIGIT *ds) 00036 { 00037 BDIGIT num; 00038 int offset = 0; 00039 00040 num = ds[len-1]; 00041 while (num) { 00042 num = SHORTDN(num); 00043 offset++; 00044 } 00045 return (len - 1)*sizeof(BDIGIT)/2 + offset; 00046 } 00047 #define SHORTLEN(x) shortlen((x),d) 00048 #endif 00049 00050 #define MARSHAL_MAJOR 4 00051 #define MARSHAL_MINOR 8 00052 00053 #define TYPE_NIL '0' 00054 #define TYPE_TRUE 'T' 00055 #define TYPE_FALSE 'F' 00056 #define TYPE_FIXNUM 'i' 00057 00058 #define TYPE_EXTENDED 'e' 00059 #define TYPE_UCLASS 'C' 00060 #define TYPE_OBJECT 'o' 00061 #define TYPE_DATA 'd' 00062 #define TYPE_USERDEF 'u' 00063 #define TYPE_USRMARSHAL 'U' 00064 #define TYPE_FLOAT 'f' 00065 #define TYPE_BIGNUM 'l' 00066 #define TYPE_STRING '"' 00067 #define TYPE_REGEXP '/' 00068 #define TYPE_ARRAY '[' 00069 #define TYPE_HASH '{' 00070 #define TYPE_HASH_DEF '}' 00071 #define TYPE_STRUCT 'S' 00072 #define TYPE_MODULE_OLD 'M' 00073 #define TYPE_CLASS 'c' 00074 #define TYPE_MODULE 'm' 00075 00076 #define TYPE_SYMBOL ':' 00077 #define TYPE_SYMLINK ';' 00078 00079 #define TYPE_IVAR 'I' 00080 #define TYPE_LINK '@' 00081 00082 static ID s_dump, s_load, s_mdump, s_mload; 00083 static ID s_dump_data, s_load_data, s_alloc, s_call; 00084 static ID s_getbyte, s_read, s_write, s_binmode; 00085 00086 typedef struct { 00087 VALUE newclass; 00088 VALUE oldclass; 00089 VALUE (*dumper)(VALUE); 00090 VALUE (*loader)(VALUE, VALUE); 00091 } marshal_compat_t; 00092 00093 static st_table *compat_allocator_tbl; 00094 static VALUE compat_allocator_tbl_wrapper; 00095 00096 static int 00097 mark_marshal_compat_i(st_data_t key, st_data_t value) 00098 { 00099 marshal_compat_t *p = (marshal_compat_t *)value; 00100 rb_gc_mark(p->newclass); 00101 rb_gc_mark(p->oldclass); 00102 return ST_CONTINUE; 00103 } 00104 00105 static void 00106 mark_marshal_compat_t(void *tbl) 00107 { 00108 if (!tbl) return; 00109 st_foreach(tbl, mark_marshal_compat_i, 0); 00110 } 00111 00112 void 00113 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE)) 00114 { 00115 marshal_compat_t *compat; 00116 rb_alloc_func_t allocator = rb_get_alloc_func(newclass); 00117 00118 if (!allocator) { 00119 rb_raise(rb_eTypeError, "no allocator"); 00120 } 00121 00122 compat = ALLOC(marshal_compat_t); 00123 compat->newclass = Qnil; 00124 compat->oldclass = Qnil; 00125 compat->newclass = newclass; 00126 compat->oldclass = oldclass; 00127 compat->dumper = dumper; 00128 compat->loader = loader; 00129 00130 st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat); 00131 } 00132 00133 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED) 00134 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1]; 00135 00136 struct dump_arg { 00137 VALUE str, dest; 00138 st_table *symbols; 00139 st_table *data; 00140 st_table *compat_tbl; 00141 st_table *encodings; 00142 int infection; 00143 }; 00144 00145 struct dump_call_arg { 00146 VALUE obj; 00147 struct dump_arg *arg; 00148 int limit; 00149 }; 00150 00151 static void 00152 check_dump_arg(struct dump_arg *arg, ID sym) 00153 { 00154 if (!arg->symbols) { 00155 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s", 00156 rb_id2name(sym)); 00157 } 00158 } 00159 00160 static void clear_dump_arg(struct dump_arg *arg); 00161 00162 static void 00163 mark_dump_arg(void *ptr) 00164 { 00165 struct dump_arg *p = ptr; 00166 if (!p->symbols) 00167 return; 00168 rb_mark_set(p->data); 00169 rb_mark_hash(p->compat_tbl); 00170 rb_gc_mark(p->str); 00171 } 00172 00173 static void 00174 free_dump_arg(void *ptr) 00175 { 00176 clear_dump_arg(ptr); 00177 xfree(ptr); 00178 } 00179 00180 static size_t 00181 memsize_dump_arg(const void *ptr) 00182 { 00183 return ptr ? sizeof(struct dump_arg) : 0; 00184 } 00185 00186 static const rb_data_type_t dump_arg_data = { 00187 "dump_arg", 00188 {mark_dump_arg, free_dump_arg, memsize_dump_arg,}, 00189 }; 00190 00191 static const char * 00192 must_not_be_anonymous(const char *type, VALUE path) 00193 { 00194 char *n = RSTRING_PTR(path); 00195 00196 if (!rb_enc_asciicompat(rb_enc_get(path))) { 00197 /* cannot occur? */ 00198 rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type); 00199 } 00200 if (n[0] == '#') { 00201 rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type, 00202 (int)RSTRING_LEN(path), n); 00203 } 00204 return n; 00205 } 00206 00207 static VALUE 00208 class2path(VALUE klass) 00209 { 00210 VALUE path = rb_class_path(klass); 00211 const char *n; 00212 00213 n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path); 00214 if (rb_path_to_class(path) != rb_class_real(klass)) { 00215 rb_raise(rb_eTypeError, "%s can't be referred to", n); 00216 } 00217 return path; 00218 } 00219 00220 static void w_long(long, struct dump_arg*); 00221 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg); 00222 00223 static void 00224 w_nbyte(const char *s, long n, struct dump_arg *arg) 00225 { 00226 VALUE buf = arg->str; 00227 rb_str_buf_cat(buf, s, n); 00228 RBASIC(buf)->flags |= arg->infection; 00229 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) { 00230 rb_io_write(arg->dest, buf); 00231 rb_str_resize(buf, 0); 00232 } 00233 } 00234 00235 static void 00236 w_byte(char c, struct dump_arg *arg) 00237 { 00238 w_nbyte(&c, 1, arg); 00239 } 00240 00241 static void 00242 w_bytes(const char *s, long n, struct dump_arg *arg) 00243 { 00244 w_long(n, arg); 00245 w_nbyte(s, n, arg); 00246 } 00247 00248 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg)) 00249 00250 static void 00251 w_short(int x, struct dump_arg *arg) 00252 { 00253 w_byte((char)((x >> 0) & 0xff), arg); 00254 w_byte((char)((x >> 8) & 0xff), arg); 00255 } 00256 00257 static void 00258 w_long(long x, struct dump_arg *arg) 00259 { 00260 char buf[sizeof(long)+1]; 00261 int i, len = 0; 00262 00263 #if SIZEOF_LONG > 4 00264 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) { 00265 /* big long does not fit in 4 bytes */ 00266 rb_raise(rb_eTypeError, "long too big to dump"); 00267 } 00268 #endif 00269 00270 if (x == 0) { 00271 w_byte(0, arg); 00272 return; 00273 } 00274 if (0 < x && x < 123) { 00275 w_byte((char)(x + 5), arg); 00276 return; 00277 } 00278 if (-124 < x && x < 0) { 00279 w_byte((char)((x - 5)&0xff), arg); 00280 return; 00281 } 00282 for (i=1;i<(int)sizeof(long)+1;i++) { 00283 buf[i] = (char)(x & 0xff); 00284 x = RSHIFT(x,8); 00285 if (x == 0) { 00286 buf[0] = i; 00287 break; 00288 } 00289 if (x == -1) { 00290 buf[0] = -i; 00291 break; 00292 } 00293 } 00294 len = i; 00295 for (i=0;i<=len;i++) { 00296 w_byte(buf[i], arg); 00297 } 00298 } 00299 00300 #ifdef DBL_MANT_DIG 00301 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */ 00302 00303 #if DBL_MANT_DIG > 32 00304 #define MANT_BITS 32 00305 #elif DBL_MANT_DIG > 24 00306 #define MANT_BITS 24 00307 #elif DBL_MANT_DIG > 16 00308 #define MANT_BITS 16 00309 #else 00310 #define MANT_BITS 8 00311 #endif 00312 00313 static double 00314 load_mantissa(double d, const char *buf, long len) 00315 { 00316 if (!len) return d; 00317 if (--len > 0 && !*buf++) { /* binary mantissa mark */ 00318 int e, s = d < 0, dig = 0; 00319 unsigned long m; 00320 00321 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); 00322 do { 00323 m = 0; 00324 switch (len) { 00325 default: m = *buf++ & 0xff; 00326 #if MANT_BITS > 24 00327 case 3: m = (m << 8) | (*buf++ & 0xff); 00328 #endif 00329 #if MANT_BITS > 16 00330 case 2: m = (m << 8) | (*buf++ & 0xff); 00331 #endif 00332 #if MANT_BITS > 8 00333 case 1: m = (m << 8) | (*buf++ & 0xff); 00334 #endif 00335 } 00336 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS; 00337 d += ldexp((double)m, dig); 00338 } while ((len -= MANT_BITS / 8) > 0); 00339 d = ldexp(d, e - DECIMAL_MANT); 00340 if (s) d = -d; 00341 } 00342 return d; 00343 } 00344 #else 00345 #define load_mantissa(d, buf, len) (d) 00346 #endif 00347 00348 #ifdef DBL_DIG 00349 #define FLOAT_DIG (DBL_DIG+2) 00350 #else 00351 #define FLOAT_DIG 17 00352 #endif 00353 00354 static void 00355 w_float(double d, struct dump_arg *arg) 00356 { 00357 char *ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve); 00358 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10]; 00359 00360 if (isinf(d)) { 00361 if (d < 0) w_cstr("-inf", arg); 00362 else w_cstr("inf", arg); 00363 } 00364 else if (isnan(d)) { 00365 w_cstr("nan", arg); 00366 } 00367 else if (d == 0.0) { 00368 if (1.0/d < 0) w_cstr("-0", arg); 00369 else w_cstr("0", arg); 00370 } 00371 else { 00372 int decpt, sign, digs, len = 0; 00373 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e); 00374 if (sign) buf[len++] = '-'; 00375 digs = (int)(e - p); 00376 if (decpt < -3 || decpt > digs) { 00377 buf[len++] = p[0]; 00378 if (--digs > 0) buf[len++] = '.'; 00379 memcpy(buf + len, p + 1, digs); 00380 len += digs; 00381 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1); 00382 } 00383 else if (decpt > 0) { 00384 memcpy(buf + len, p, decpt); 00385 len += decpt; 00386 if ((digs -= decpt) > 0) { 00387 buf[len++] = '.'; 00388 memcpy(buf + len, p + decpt, digs); 00389 len += digs; 00390 } 00391 } 00392 else { 00393 buf[len++] = '0'; 00394 buf[len++] = '.'; 00395 if (decpt) { 00396 memset(buf + len, '0', -decpt); 00397 len -= decpt; 00398 } 00399 memcpy(buf + len, p, digs); 00400 len += digs; 00401 } 00402 xfree(p); 00403 w_bytes(buf, len, arg); 00404 } 00405 } 00406 00407 static void 00408 w_symbol(ID id, struct dump_arg *arg) 00409 { 00410 VALUE sym; 00411 st_data_t num; 00412 int encidx = -1; 00413 00414 if (st_lookup(arg->symbols, id, &num)) { 00415 w_byte(TYPE_SYMLINK, arg); 00416 w_long((long)num, arg); 00417 } 00418 else { 00419 sym = rb_id2str(id); 00420 if (!sym) { 00421 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, id); 00422 } 00423 encidx = rb_enc_get_index(sym); 00424 if (encidx == rb_usascii_encindex() || 00425 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) { 00426 encidx = -1; 00427 } 00428 else { 00429 w_byte(TYPE_IVAR, arg); 00430 } 00431 w_byte(TYPE_SYMBOL, arg); 00432 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg); 00433 st_add_direct(arg->symbols, id, arg->symbols->num_entries); 00434 if (encidx != -1) { 00435 struct dump_call_arg c_arg; 00436 c_arg.limit = 1; 00437 c_arg.arg = arg; 00438 w_encoding(sym, 0, &c_arg); 00439 } 00440 } 00441 } 00442 00443 static void 00444 w_unique(VALUE s, struct dump_arg *arg) 00445 { 00446 must_not_be_anonymous("class", s); 00447 w_symbol(rb_intern_str(s), arg); 00448 } 00449 00450 static void w_object(VALUE,struct dump_arg*,int); 00451 00452 static int 00453 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg) 00454 { 00455 w_object(key, arg->arg, arg->limit); 00456 w_object(value, arg->arg, arg->limit); 00457 return ST_CONTINUE; 00458 } 00459 00460 static void 00461 w_extended(VALUE klass, struct dump_arg *arg, int check) 00462 { 00463 if (check && FL_TEST(klass, FL_SINGLETON)) { 00464 if (RCLASS_M_TBL(klass)->num_entries || 00465 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) { 00466 rb_raise(rb_eTypeError, "singleton can't be dumped"); 00467 } 00468 klass = RCLASS_SUPER(klass); 00469 } 00470 while (BUILTIN_TYPE(klass) == T_ICLASS) { 00471 VALUE path = rb_class_name(RBASIC(klass)->klass); 00472 w_byte(TYPE_EXTENDED, arg); 00473 w_unique(path, arg); 00474 klass = RCLASS_SUPER(klass); 00475 } 00476 } 00477 00478 static void 00479 w_class(char type, VALUE obj, struct dump_arg *arg, int check) 00480 { 00481 VALUE path; 00482 st_data_t real_obj; 00483 VALUE klass; 00484 00485 if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) { 00486 obj = (VALUE)real_obj; 00487 } 00488 klass = CLASS_OF(obj); 00489 w_extended(klass, arg, check); 00490 w_byte(type, arg); 00491 path = class2path(rb_class_real(klass)); 00492 w_unique(path, arg); 00493 } 00494 00495 static void 00496 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg) 00497 { 00498 VALUE klass = CLASS_OF(obj); 00499 00500 w_extended(klass, arg, TRUE); 00501 klass = rb_class_real(klass); 00502 if (klass != super) { 00503 w_byte(TYPE_UCLASS, arg); 00504 w_unique(class2path(klass), arg); 00505 } 00506 } 00507 00508 static int 00509 w_obj_each(ID id, VALUE value, struct dump_call_arg *arg) 00510 { 00511 if (id == rb_id_encoding()) return ST_CONTINUE; 00512 if (id == rb_intern("E")) return ST_CONTINUE; 00513 w_symbol(id, arg->arg); 00514 w_object(value, arg->arg, arg->limit); 00515 return ST_CONTINUE; 00516 } 00517 00518 static void 00519 w_encoding(VALUE obj, long num, struct dump_call_arg *arg) 00520 { 00521 int encidx = rb_enc_get_index(obj); 00522 rb_encoding *enc = 0; 00523 st_data_t name; 00524 00525 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) { 00526 w_long(num, arg->arg); 00527 return; 00528 } 00529 w_long(num + 1, arg->arg); 00530 00531 /* special treatment for US-ASCII and UTF-8 */ 00532 if (encidx == rb_usascii_encindex()) { 00533 w_symbol(rb_intern("E"), arg->arg); 00534 w_object(Qfalse, arg->arg, arg->limit + 1); 00535 return; 00536 } 00537 else if (encidx == rb_utf8_encindex()) { 00538 w_symbol(rb_intern("E"), arg->arg); 00539 w_object(Qtrue, arg->arg, arg->limit + 1); 00540 return; 00541 } 00542 00543 w_symbol(rb_id_encoding(), arg->arg); 00544 do { 00545 if (!arg->arg->encodings) 00546 arg->arg->encodings = st_init_strcasetable(); 00547 else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name)) 00548 break; 00549 name = (st_data_t)rb_str_new2(rb_enc_name(enc)); 00550 st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name); 00551 } while (0); 00552 w_object(name, arg->arg, arg->limit + 1); 00553 } 00554 00555 static void 00556 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg) 00557 { 00558 long num = tbl ? tbl->num_entries : 0; 00559 00560 w_encoding(obj, num, arg); 00561 if (tbl) { 00562 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg); 00563 } 00564 } 00565 00566 static void 00567 w_objivar(VALUE obj, struct dump_call_arg *arg) 00568 { 00569 VALUE *ptr; 00570 long i, len, num; 00571 00572 len = ROBJECT_NUMIV(obj); 00573 ptr = ROBJECT_IVPTR(obj); 00574 num = 0; 00575 for (i = 0; i < len; i++) 00576 if (ptr[i] != Qundef) 00577 num += 1; 00578 00579 w_encoding(obj, num, arg); 00580 if (num != 0) { 00581 rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg); 00582 } 00583 } 00584 00585 static void 00586 w_object(VALUE obj, struct dump_arg *arg, int limit) 00587 { 00588 struct dump_call_arg c_arg; 00589 st_table *ivtbl = 0; 00590 st_data_t num; 00591 int hasiv = 0; 00592 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \ 00593 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj))) 00594 00595 if (limit == 0) { 00596 rb_raise(rb_eArgError, "exceed depth limit"); 00597 } 00598 00599 limit--; 00600 c_arg.limit = limit; 00601 c_arg.arg = arg; 00602 00603 if (st_lookup(arg->data, obj, &num)) { 00604 w_byte(TYPE_LINK, arg); 00605 w_long((long)num, arg); 00606 return; 00607 } 00608 00609 if (obj == Qnil) { 00610 w_byte(TYPE_NIL, arg); 00611 } 00612 else if (obj == Qtrue) { 00613 w_byte(TYPE_TRUE, arg); 00614 } 00615 else if (obj == Qfalse) { 00616 w_byte(TYPE_FALSE, arg); 00617 } 00618 else if (FIXNUM_P(obj)) { 00619 #if SIZEOF_LONG <= 4 00620 w_byte(TYPE_FIXNUM, arg); 00621 w_long(FIX2INT(obj), arg); 00622 #else 00623 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) { 00624 w_byte(TYPE_FIXNUM, arg); 00625 w_long(FIX2LONG(obj), arg); 00626 } 00627 else { 00628 w_object(rb_int2big(FIX2LONG(obj)), arg, limit); 00629 } 00630 #endif 00631 } 00632 else if (SYMBOL_P(obj)) { 00633 w_symbol(SYM2ID(obj), arg); 00634 } 00635 else { 00636 arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION); 00637 00638 if (rb_respond_to(obj, s_mdump)) { 00639 volatile VALUE v; 00640 00641 st_add_direct(arg->data, obj, arg->data->num_entries); 00642 00643 v = rb_funcall(obj, s_mdump, 0, 0); 00644 check_dump_arg(arg, s_mdump); 00645 hasiv = has_ivars(obj, ivtbl); 00646 if (hasiv) w_byte(TYPE_IVAR, arg); 00647 w_class(TYPE_USRMARSHAL, obj, arg, FALSE); 00648 w_object(v, arg, limit); 00649 if (hasiv) w_ivar(obj, ivtbl, &c_arg); 00650 return; 00651 } 00652 if (rb_respond_to(obj, s_dump)) { 00653 VALUE v; 00654 st_table *ivtbl2 = 0; 00655 int hasiv2; 00656 00657 v = rb_funcall(obj, s_dump, 1, INT2NUM(limit)); 00658 check_dump_arg(arg, s_dump); 00659 if (TYPE(v) != T_STRING) { 00660 rb_raise(rb_eTypeError, "_dump() must return string"); 00661 } 00662 hasiv = has_ivars(obj, ivtbl); 00663 if (hasiv) w_byte(TYPE_IVAR, arg); 00664 if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) { 00665 w_byte(TYPE_IVAR, arg); 00666 } 00667 w_class(TYPE_USERDEF, obj, arg, FALSE); 00668 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg); 00669 if (hasiv2) { 00670 w_ivar(v, ivtbl2, &c_arg); 00671 } 00672 else if (hasiv) { 00673 w_ivar(obj, ivtbl, &c_arg); 00674 } 00675 st_add_direct(arg->data, obj, arg->data->num_entries); 00676 return; 00677 } 00678 00679 st_add_direct(arg->data, obj, arg->data->num_entries); 00680 00681 hasiv = has_ivars(obj, ivtbl); 00682 { 00683 st_data_t compat_data; 00684 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass); 00685 if (st_lookup(compat_allocator_tbl, 00686 (st_data_t)allocator, 00687 &compat_data)) { 00688 marshal_compat_t *compat = (marshal_compat_t*)compat_data; 00689 VALUE real_obj = obj; 00690 obj = compat->dumper(real_obj); 00691 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); 00692 if (obj != real_obj && !ivtbl) hasiv = 0; 00693 } 00694 } 00695 if (hasiv) w_byte(TYPE_IVAR, arg); 00696 00697 switch (BUILTIN_TYPE(obj)) { 00698 case T_CLASS: 00699 if (FL_TEST(obj, FL_SINGLETON)) { 00700 rb_raise(rb_eTypeError, "singleton class can't be dumped"); 00701 } 00702 w_byte(TYPE_CLASS, arg); 00703 { 00704 volatile VALUE path = class2path(obj); 00705 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); 00706 } 00707 break; 00708 00709 case T_MODULE: 00710 w_byte(TYPE_MODULE, arg); 00711 { 00712 VALUE path = class2path(obj); 00713 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); 00714 } 00715 break; 00716 00717 case T_FLOAT: 00718 w_byte(TYPE_FLOAT, arg); 00719 w_float(RFLOAT_VALUE(obj), arg); 00720 break; 00721 00722 case T_BIGNUM: 00723 w_byte(TYPE_BIGNUM, arg); 00724 { 00725 char sign = RBIGNUM_SIGN(obj) ? '+' : '-'; 00726 long len = RBIGNUM_LEN(obj); 00727 BDIGIT *d = RBIGNUM_DIGITS(obj); 00728 00729 w_byte(sign, arg); 00730 w_long(SHORTLEN(len), arg); /* w_short? */ 00731 while (len--) { 00732 #if SIZEOF_BDIGITS > SIZEOF_SHORT 00733 BDIGIT num = *d; 00734 int i; 00735 00736 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) { 00737 w_short(num & SHORTMASK, arg); 00738 num = SHORTDN(num); 00739 if (len == 0 && num == 0) break; 00740 } 00741 #else 00742 w_short(*d, arg); 00743 #endif 00744 d++; 00745 } 00746 } 00747 break; 00748 00749 case T_STRING: 00750 w_uclass(obj, rb_cString, arg); 00751 w_byte(TYPE_STRING, arg); 00752 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg); 00753 break; 00754 00755 case T_REGEXP: 00756 w_uclass(obj, rb_cRegexp, arg); 00757 w_byte(TYPE_REGEXP, arg); 00758 { 00759 int opts = rb_reg_options(obj); 00760 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg); 00761 w_byte((char)opts, arg); 00762 } 00763 break; 00764 00765 case T_ARRAY: 00766 w_uclass(obj, rb_cArray, arg); 00767 w_byte(TYPE_ARRAY, arg); 00768 { 00769 long i, len = RARRAY_LEN(obj); 00770 00771 w_long(len, arg); 00772 for (i=0; i<RARRAY_LEN(obj); i++) { 00773 w_object(RARRAY_PTR(obj)[i], arg, limit); 00774 if (len != RARRAY_LEN(obj)) { 00775 rb_raise(rb_eRuntimeError, "array modified during dump"); 00776 } 00777 } 00778 } 00779 break; 00780 00781 case T_HASH: 00782 w_uclass(obj, rb_cHash, arg); 00783 if (NIL_P(RHASH_IFNONE(obj))) { 00784 w_byte(TYPE_HASH, arg); 00785 } 00786 else if (FL_TEST(obj, FL_USER2)) { 00787 /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */ 00788 rb_raise(rb_eTypeError, "can't dump hash with default proc"); 00789 } 00790 else { 00791 w_byte(TYPE_HASH_DEF, arg); 00792 } 00793 w_long(RHASH_SIZE(obj), arg); 00794 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg); 00795 if (!NIL_P(RHASH_IFNONE(obj))) { 00796 w_object(RHASH_IFNONE(obj), arg, limit); 00797 } 00798 break; 00799 00800 case T_STRUCT: 00801 w_class(TYPE_STRUCT, obj, arg, TRUE); 00802 { 00803 long len = RSTRUCT_LEN(obj); 00804 VALUE mem; 00805 long i; 00806 00807 w_long(len, arg); 00808 mem = rb_struct_members(obj); 00809 for (i=0; i<len; i++) { 00810 w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg); 00811 w_object(RSTRUCT_PTR(obj)[i], arg, limit); 00812 } 00813 } 00814 break; 00815 00816 case T_OBJECT: 00817 w_class(TYPE_OBJECT, obj, arg, TRUE); 00818 w_objivar(obj, &c_arg); 00819 break; 00820 00821 case T_DATA: 00822 { 00823 VALUE v; 00824 00825 if (!rb_respond_to(obj, s_dump_data)) { 00826 rb_raise(rb_eTypeError, 00827 "no _dump_data is defined for class %s", 00828 rb_obj_classname(obj)); 00829 } 00830 v = rb_funcall(obj, s_dump_data, 0); 00831 check_dump_arg(arg, s_dump_data); 00832 w_class(TYPE_DATA, obj, arg, TRUE); 00833 w_object(v, arg, limit); 00834 } 00835 break; 00836 00837 default: 00838 rb_raise(rb_eTypeError, "can't dump %s", 00839 rb_obj_classname(obj)); 00840 break; 00841 } 00842 } 00843 if (hasiv) { 00844 w_ivar(obj, ivtbl, &c_arg); 00845 } 00846 } 00847 00848 static void 00849 clear_dump_arg(struct dump_arg *arg) 00850 { 00851 if (!arg->symbols) return; 00852 st_free_table(arg->symbols); 00853 arg->symbols = 0; 00854 st_free_table(arg->data); 00855 arg->data = 0; 00856 st_free_table(arg->compat_tbl); 00857 arg->compat_tbl = 0; 00858 if (arg->encodings) { 00859 st_free_table(arg->encodings); 00860 arg->encodings = 0; 00861 } 00862 } 00863 00864 /* 00865 * call-seq: 00866 * dump( obj [, anIO] , limit=-1 ) -> anIO 00867 * 00868 * Serializes obj and all descendant objects. If anIO is 00869 * specified, the serialized data will be written to it, otherwise the 00870 * data will be returned as a String. If limit is specified, the 00871 * traversal of subobjects will be limited to that depth. If limit is 00872 * negative, no checking of depth will be performed. 00873 * 00874 * class Klass 00875 * def initialize(str) 00876 * @str = str 00877 * end 00878 * def say_hello 00879 * @str 00880 * end 00881 * end 00882 * 00883 * (produces no output) 00884 * 00885 * o = Klass.new("hello\n") 00886 * data = Marshal.dump(o) 00887 * obj = Marshal.load(data) 00888 * obj.say_hello #=> "hello\n" 00889 * 00890 * Marshal can't dump following objects: 00891 * * anonymous Class/Module. 00892 * * objects which related to its system (ex: Dir, File::Stat, IO, File, Socket 00893 * and so on) 00894 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread, 00895 * ThreadGroup, Continuation 00896 * * objects which defines singleton methods 00897 */ 00898 static VALUE 00899 marshal_dump(int argc, VALUE *argv) 00900 { 00901 VALUE obj, port, a1, a2; 00902 int limit = -1; 00903 struct dump_arg *arg; 00904 volatile VALUE wrapper; 00905 00906 port = Qnil; 00907 rb_scan_args(argc, argv, "12", &obj, &a1, &a2); 00908 if (argc == 3) { 00909 if (!NIL_P(a2)) limit = NUM2INT(a2); 00910 if (NIL_P(a1)) goto type_error; 00911 port = a1; 00912 } 00913 else if (argc == 2) { 00914 if (FIXNUM_P(a1)) limit = FIX2INT(a1); 00915 else if (NIL_P(a1)) goto type_error; 00916 else port = a1; 00917 } 00918 wrapper = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg); 00919 arg->dest = 0; 00920 arg->symbols = st_init_numtable(); 00921 arg->data = st_init_numtable(); 00922 arg->infection = 0; 00923 arg->compat_tbl = st_init_numtable(); 00924 arg->encodings = 0; 00925 arg->str = rb_str_buf_new(0); 00926 if (!NIL_P(port)) { 00927 if (!rb_respond_to(port, s_write)) { 00928 type_error: 00929 rb_raise(rb_eTypeError, "instance of IO needed"); 00930 } 00931 arg->dest = port; 00932 if (rb_respond_to(port, s_binmode)) { 00933 rb_funcall2(port, s_binmode, 0, 0); 00934 check_dump_arg(arg, s_binmode); 00935 } 00936 } 00937 else { 00938 port = arg->str; 00939 } 00940 00941 w_byte(MARSHAL_MAJOR, arg); 00942 w_byte(MARSHAL_MINOR, arg); 00943 00944 w_object(obj, arg, limit); 00945 if (arg->dest) { 00946 rb_io_write(arg->dest, arg->str); 00947 rb_str_resize(arg->str, 0); 00948 } 00949 clear_dump_arg(arg); 00950 RB_GC_GUARD(wrapper); 00951 00952 return port; 00953 } 00954 00955 struct load_arg { 00956 VALUE src; 00957 long offset; 00958 st_table *symbols; 00959 st_table *data; 00960 VALUE proc; 00961 st_table *compat_tbl; 00962 int infection; 00963 }; 00964 00965 static void 00966 check_load_arg(struct load_arg *arg, ID sym) 00967 { 00968 if (!arg->symbols) { 00969 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s", 00970 rb_id2name(sym)); 00971 } 00972 } 00973 00974 static void clear_load_arg(struct load_arg *arg); 00975 00976 static void 00977 mark_load_arg(void *ptr) 00978 { 00979 struct load_arg *p = ptr; 00980 if (!p->symbols) 00981 return; 00982 rb_mark_tbl(p->data); 00983 rb_mark_hash(p->compat_tbl); 00984 } 00985 00986 static void 00987 free_load_arg(void *ptr) 00988 { 00989 clear_load_arg(ptr); 00990 xfree(ptr); 00991 } 00992 00993 static size_t 00994 memsize_load_arg(const void *ptr) 00995 { 00996 return ptr ? sizeof(struct load_arg) : 0; 00997 } 00998 00999 static const rb_data_type_t load_arg_data = { 01000 "load_arg", 01001 {mark_load_arg, free_load_arg, memsize_load_arg,}, 01002 }; 01003 01004 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg)) 01005 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg); 01006 static VALUE r_object(struct load_arg *arg); 01007 static ID r_symbol(struct load_arg *arg); 01008 static VALUE path2class(VALUE path); 01009 01010 static st_index_t 01011 r_prepare(struct load_arg *arg) 01012 { 01013 st_index_t idx = arg->data->num_entries; 01014 01015 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef); 01016 return idx; 01017 } 01018 01019 static int 01020 r_byte(struct load_arg *arg) 01021 { 01022 int c; 01023 01024 if (TYPE(arg->src) == T_STRING) { 01025 if (RSTRING_LEN(arg->src) > arg->offset) { 01026 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++]; 01027 } 01028 else { 01029 rb_raise(rb_eArgError, "marshal data too short"); 01030 } 01031 } 01032 else { 01033 VALUE src = arg->src; 01034 VALUE v = rb_funcall2(src, s_getbyte, 0, 0); 01035 check_load_arg(arg, s_getbyte); 01036 if (NIL_P(v)) rb_eof_error(); 01037 c = (unsigned char)NUM2CHR(v); 01038 } 01039 return c; 01040 } 01041 01042 static void 01043 long_toobig(int size) 01044 { 01045 rb_raise(rb_eTypeError, "long too big for this architecture (size " 01046 STRINGIZE(SIZEOF_LONG)", given %d)", size); 01047 } 01048 01049 #undef SIGN_EXTEND_CHAR 01050 #if __STDC__ 01051 # define SIGN_EXTEND_CHAR(c) ((signed char)(c)) 01052 #else /* not __STDC__ */ 01053 /* As in Harbison and Steele. */ 01054 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) 01055 #endif 01056 01057 static long 01058 r_long(struct load_arg *arg) 01059 { 01060 register long x; 01061 int c = SIGN_EXTEND_CHAR(r_byte(arg)); 01062 long i; 01063 01064 if (c == 0) return 0; 01065 if (c > 0) { 01066 if (4 < c && c < 128) { 01067 return c - 5; 01068 } 01069 if (c > (int)sizeof(long)) long_toobig(c); 01070 x = 0; 01071 for (i=0;i<c;i++) { 01072 x |= (long)r_byte(arg) << (8*i); 01073 } 01074 } 01075 else { 01076 if (-129 < c && c < -4) { 01077 return c + 5; 01078 } 01079 c = -c; 01080 if (c > (int)sizeof(long)) long_toobig(c); 01081 x = -1; 01082 for (i=0;i<c;i++) { 01083 x &= ~((long)0xff << (8*i)); 01084 x |= (long)r_byte(arg) << (8*i); 01085 } 01086 } 01087 return x; 01088 } 01089 01090 #define r_bytes(arg) r_bytes0(r_long(arg), (arg)) 01091 01092 static VALUE 01093 r_bytes0(long len, struct load_arg *arg) 01094 { 01095 VALUE str; 01096 01097 if (len == 0) return rb_str_new(0, 0); 01098 if (TYPE(arg->src) == T_STRING) { 01099 if (RSTRING_LEN(arg->src) - arg->offset >= len) { 01100 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len); 01101 arg->offset += len; 01102 } 01103 else { 01104 too_short: 01105 rb_raise(rb_eArgError, "marshal data too short"); 01106 } 01107 } 01108 else { 01109 VALUE src = arg->src; 01110 VALUE n = LONG2NUM(len); 01111 str = rb_funcall2(src, s_read, 1, &n); 01112 check_load_arg(arg, s_read); 01113 if (NIL_P(str)) goto too_short; 01114 StringValue(str); 01115 if (RSTRING_LEN(str) != len) goto too_short; 01116 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION); 01117 } 01118 return str; 01119 } 01120 01121 static int 01122 id2encidx(ID id, VALUE val) 01123 { 01124 if (id == rb_id_encoding()) { 01125 int idx = rb_enc_find_index(StringValueCStr(val)); 01126 return idx; 01127 } 01128 else if (id == rb_intern("E")) { 01129 if (val == Qfalse) return rb_usascii_encindex(); 01130 else if (val == Qtrue) return rb_utf8_encindex(); 01131 /* bogus ignore */ 01132 } 01133 return -1; 01134 } 01135 01136 static ID 01137 r_symlink(struct load_arg *arg) 01138 { 01139 st_data_t id; 01140 long num = r_long(arg); 01141 01142 if (st_lookup(arg->symbols, num, &id)) { 01143 return (ID)id; 01144 } 01145 rb_raise(rb_eArgError, "bad symbol"); 01146 } 01147 01148 static ID 01149 r_symreal(struct load_arg *arg, int ivar) 01150 { 01151 volatile VALUE s = r_bytes(arg); 01152 ID id; 01153 int idx = -1; 01154 st_index_t n = arg->symbols->num_entries; 01155 01156 st_insert(arg->symbols, (st_data_t)n, (st_data_t)0); 01157 if (ivar) { 01158 long num = r_long(arg); 01159 while (num-- > 0) { 01160 id = r_symbol(arg); 01161 idx = id2encidx(id, r_object(arg)); 01162 } 01163 } 01164 if (idx < 0) idx = rb_usascii_encindex(); 01165 rb_enc_associate_index(s, idx); 01166 id = rb_intern_str(s); 01167 st_insert(arg->symbols, (st_data_t)n, (st_data_t)id); 01168 01169 return id; 01170 } 01171 01172 static ID 01173 r_symbol(struct load_arg *arg) 01174 { 01175 int type, ivar = 0; 01176 01177 again: 01178 switch ((type = r_byte(arg))) { 01179 case TYPE_IVAR: 01180 ivar = 1; 01181 goto again; 01182 case TYPE_SYMBOL: 01183 return r_symreal(arg, ivar); 01184 case TYPE_SYMLINK: 01185 if (ivar) { 01186 rb_raise(rb_eArgError, "dump format error (symlink with encoding)"); 01187 } 01188 return r_symlink(arg); 01189 default: 01190 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type); 01191 break; 01192 } 01193 } 01194 01195 static VALUE 01196 r_unique(struct load_arg *arg) 01197 { 01198 return rb_id2str(r_symbol(arg)); 01199 } 01200 01201 static VALUE 01202 r_string(struct load_arg *arg) 01203 { 01204 return r_bytes(arg); 01205 } 01206 01207 static VALUE 01208 r_entry0(VALUE v, st_index_t num, struct load_arg *arg) 01209 { 01210 st_data_t real_obj = (VALUE)Qundef; 01211 if (st_lookup(arg->compat_tbl, v, &real_obj)) { 01212 st_insert(arg->data, num, (st_data_t)real_obj); 01213 } 01214 else { 01215 st_insert(arg->data, num, (st_data_t)v); 01216 } 01217 if (arg->infection && 01218 TYPE(v) != T_CLASS && TYPE(v) != T_MODULE) { 01219 FL_SET(v, arg->infection); 01220 if ((VALUE)real_obj != Qundef) 01221 FL_SET((VALUE)real_obj, arg->infection); 01222 } 01223 return v; 01224 } 01225 01226 static VALUE 01227 r_leave(VALUE v, struct load_arg *arg) 01228 { 01229 st_data_t data; 01230 if (st_lookup(arg->compat_tbl, v, &data)) { 01231 VALUE real_obj = (VALUE)data; 01232 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj)); 01233 st_data_t key = v; 01234 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) { 01235 marshal_compat_t *compat = (marshal_compat_t*)data; 01236 compat->loader(real_obj, v); 01237 } 01238 st_delete(arg->compat_tbl, &key, 0); 01239 v = real_obj; 01240 } 01241 if (arg->proc) { 01242 v = rb_funcall(arg->proc, s_call, 1, v); 01243 check_load_arg(arg, s_call); 01244 } 01245 return v; 01246 } 01247 01248 static void 01249 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg) 01250 { 01251 long len; 01252 01253 len = r_long(arg); 01254 if (len > 0) { 01255 do { 01256 ID id = r_symbol(arg); 01257 VALUE val = r_object(arg); 01258 int idx = id2encidx(id, val); 01259 if (idx >= 0) { 01260 rb_enc_associate_index(obj, idx); 01261 if (has_encoding) *has_encoding = TRUE; 01262 } 01263 else { 01264 rb_ivar_set(obj, id, val); 01265 } 01266 } while (--len > 0); 01267 } 01268 } 01269 01270 static VALUE 01271 path2class(VALUE path) 01272 { 01273 VALUE v = rb_path_to_class(path); 01274 01275 if (TYPE(v) != T_CLASS) { 01276 rb_raise(rb_eArgError, "%.*s does not refer to class", 01277 (int)RSTRING_LEN(path), RSTRING_PTR(path)); 01278 } 01279 return v; 01280 } 01281 01282 static VALUE 01283 path2module(VALUE path) 01284 { 01285 VALUE v = rb_path_to_class(path); 01286 01287 if (TYPE(v) != T_MODULE) { 01288 rb_raise(rb_eArgError, "%.*s does not refer to module", 01289 (int)RSTRING_LEN(path), RSTRING_PTR(path)); 01290 } 01291 return v; 01292 } 01293 01294 static VALUE 01295 obj_alloc_by_path(VALUE path, struct load_arg *arg) 01296 { 01297 VALUE klass; 01298 st_data_t data; 01299 rb_alloc_func_t allocator; 01300 01301 klass = path2class(path); 01302 01303 allocator = rb_get_alloc_func(klass); 01304 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) { 01305 marshal_compat_t *compat = (marshal_compat_t*)data; 01306 VALUE real_obj = rb_obj_alloc(klass); 01307 VALUE obj = rb_obj_alloc(compat->oldclass); 01308 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); 01309 return obj; 01310 } 01311 01312 return rb_obj_alloc(klass); 01313 } 01314 01315 static VALUE 01316 r_object0(struct load_arg *arg, int *ivp, VALUE extmod) 01317 { 01318 VALUE v = Qnil; 01319 int type = r_byte(arg); 01320 long id; 01321 st_data_t link; 01322 01323 switch (type) { 01324 case TYPE_LINK: 01325 id = r_long(arg); 01326 if (!st_lookup(arg->data, (st_data_t)id, &link)) { 01327 rb_raise(rb_eArgError, "dump format error (unlinked)"); 01328 } 01329 v = (VALUE)link; 01330 if (arg->proc) { 01331 v = rb_funcall(arg->proc, s_call, 1, v); 01332 check_load_arg(arg, s_call); 01333 } 01334 break; 01335 01336 case TYPE_IVAR: 01337 { 01338 int ivar = TRUE; 01339 01340 v = r_object0(arg, &ivar, extmod); 01341 if (ivar) r_ivar(v, NULL, arg); 01342 } 01343 break; 01344 01345 case TYPE_EXTENDED: 01346 { 01347 VALUE m = path2module(r_unique(arg)); 01348 01349 if (NIL_P(extmod)) extmod = rb_ary_new2(0); 01350 rb_ary_push(extmod, m); 01351 01352 v = r_object0(arg, 0, extmod); 01353 while (RARRAY_LEN(extmod) > 0) { 01354 m = rb_ary_pop(extmod); 01355 rb_extend_object(v, m); 01356 } 01357 } 01358 break; 01359 01360 case TYPE_UCLASS: 01361 { 01362 VALUE c = path2class(r_unique(arg)); 01363 01364 if (FL_TEST(c, FL_SINGLETON)) { 01365 rb_raise(rb_eTypeError, "singleton can't be loaded"); 01366 } 01367 v = r_object0(arg, 0, extmod); 01368 if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) { 01369 format_error: 01370 rb_raise(rb_eArgError, "dump format error (user class)"); 01371 } 01372 if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) { 01373 VALUE tmp = rb_obj_alloc(c); 01374 01375 if (TYPE(v) != TYPE(tmp)) goto format_error; 01376 } 01377 RBASIC(v)->klass = c; 01378 } 01379 break; 01380 01381 case TYPE_NIL: 01382 v = Qnil; 01383 v = r_leave(v, arg); 01384 break; 01385 01386 case TYPE_TRUE: 01387 v = Qtrue; 01388 v = r_leave(v, arg); 01389 break; 01390 01391 case TYPE_FALSE: 01392 v = Qfalse; 01393 v = r_leave(v, arg); 01394 break; 01395 01396 case TYPE_FIXNUM: 01397 { 01398 long i = r_long(arg); 01399 v = LONG2FIX(i); 01400 } 01401 v = r_leave(v, arg); 01402 break; 01403 01404 case TYPE_FLOAT: 01405 { 01406 double d; 01407 VALUE str = r_bytes(arg); 01408 const char *ptr = RSTRING_PTR(str); 01409 01410 if (strcmp(ptr, "nan") == 0) { 01411 d = NAN; 01412 } 01413 else if (strcmp(ptr, "inf") == 0) { 01414 d = INFINITY; 01415 } 01416 else if (strcmp(ptr, "-inf") == 0) { 01417 d = -INFINITY; 01418 } 01419 else { 01420 char *e; 01421 d = strtod(ptr, &e); 01422 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr)); 01423 } 01424 v = DBL2NUM(d); 01425 v = r_entry(v, arg); 01426 v = r_leave(v, arg); 01427 } 01428 break; 01429 01430 case TYPE_BIGNUM: 01431 { 01432 long len; 01433 BDIGIT *digits; 01434 volatile VALUE data; 01435 01436 NEWOBJ(big, struct RBignum); 01437 OBJSETUP(big, rb_cBignum, T_BIGNUM); 01438 RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+')); 01439 len = r_long(arg); 01440 data = r_bytes0(len * 2, arg); 01441 #if SIZEOF_BDIGITS == SIZEOF_SHORT 01442 rb_big_resize((VALUE)big, len); 01443 #else 01444 rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT)); 01445 #endif 01446 digits = RBIGNUM_DIGITS(big); 01447 MEMCPY(digits, RSTRING_PTR(data), char, len * 2); 01448 #if SIZEOF_BDIGITS > SIZEOF_SHORT 01449 MEMZERO((char *)digits + len * 2, char, 01450 RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2); 01451 #endif 01452 len = RBIGNUM_LEN(big); 01453 while (len > 0) { 01454 unsigned char *p = (unsigned char *)digits; 01455 BDIGIT num = 0; 01456 #if SIZEOF_BDIGITS > SIZEOF_SHORT 01457 int shift = 0; 01458 int i; 01459 01460 for (i=0; i<SIZEOF_BDIGITS; i++) { 01461 num |= (int)p[i] << shift; 01462 shift += 8; 01463 } 01464 #else 01465 num = p[0] | (p[1] << 8); 01466 #endif 01467 *digits++ = num; 01468 len--; 01469 } 01470 v = rb_big_norm((VALUE)big); 01471 v = r_entry(v, arg); 01472 v = r_leave(v, arg); 01473 } 01474 break; 01475 01476 case TYPE_STRING: 01477 v = r_entry(r_string(arg), arg); 01478 v = r_leave(v, arg); 01479 break; 01480 01481 case TYPE_REGEXP: 01482 { 01483 volatile VALUE str = r_bytes(arg); 01484 int options = r_byte(arg); 01485 int has_encoding = FALSE; 01486 st_index_t idx = r_prepare(arg); 01487 01488 if (ivp) { 01489 r_ivar(str, &has_encoding, arg); 01490 *ivp = FALSE; 01491 } 01492 if (!has_encoding) { 01493 /* 1.8 compatibility; remove escapes undefined in 1.8 */ 01494 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr; 01495 long len = RSTRING_LEN(str); 01496 long bs = 0; 01497 for (; len-- > 0; *dst++ = *src++) { 01498 switch (*src) { 01499 case '\\': bs++; break; 01500 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 01501 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y': 01502 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K': 01503 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R': 01504 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y': 01505 if (bs & 1) --dst; 01506 default: bs = 0; break; 01507 } 01508 } 01509 rb_str_set_len(str, dst - ptr); 01510 } 01511 v = r_entry0(rb_reg_new_str(str, options), idx, arg); 01512 v = r_leave(v, arg); 01513 } 01514 break; 01515 01516 case TYPE_ARRAY: 01517 { 01518 volatile long len = r_long(arg); /* gcc 2.7.2.3 -O2 bug?? */ 01519 01520 v = rb_ary_new2(len); 01521 v = r_entry(v, arg); 01522 while (len--) { 01523 rb_ary_push(v, r_object(arg)); 01524 } 01525 v = r_leave(v, arg); 01526 } 01527 break; 01528 01529 case TYPE_HASH: 01530 case TYPE_HASH_DEF: 01531 { 01532 long len = r_long(arg); 01533 01534 v = rb_hash_new(); 01535 v = r_entry(v, arg); 01536 while (len--) { 01537 VALUE key = r_object(arg); 01538 VALUE value = r_object(arg); 01539 rb_hash_aset(v, key, value); 01540 } 01541 if (type == TYPE_HASH_DEF) { 01542 RHASH_IFNONE(v) = r_object(arg); 01543 } 01544 v = r_leave(v, arg); 01545 } 01546 break; 01547 01548 case TYPE_STRUCT: 01549 { 01550 VALUE mem, values; 01551 volatile long i; /* gcc 2.7.2.3 -O2 bug?? */ 01552 ID slot; 01553 st_index_t idx = r_prepare(arg); 01554 VALUE klass = path2class(r_unique(arg)); 01555 long len = r_long(arg); 01556 01557 v = rb_obj_alloc(klass); 01558 if (TYPE(v) != T_STRUCT) { 01559 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass)); 01560 } 01561 mem = rb_struct_s_members(klass); 01562 if (RARRAY_LEN(mem) != len) { 01563 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)", 01564 rb_class2name(klass)); 01565 } 01566 01567 v = r_entry0(v, idx, arg); 01568 values = rb_ary_new2(len); 01569 for (i=0; i<len; i++) { 01570 slot = r_symbol(arg); 01571 01572 if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) { 01573 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)", 01574 rb_class2name(klass), 01575 rb_id2name(slot), 01576 rb_id2name(SYM2ID(RARRAY_PTR(mem)[i]))); 01577 } 01578 rb_ary_push(values, r_object(arg)); 01579 } 01580 rb_struct_initialize(v, values); 01581 v = r_leave(v, arg); 01582 } 01583 break; 01584 01585 case TYPE_USERDEF: 01586 { 01587 VALUE klass = path2class(r_unique(arg)); 01588 VALUE data; 01589 01590 if (!rb_respond_to(klass, s_load)) { 01591 rb_raise(rb_eTypeError, "class %s needs to have method `_load'", 01592 rb_class2name(klass)); 01593 } 01594 data = r_string(arg); 01595 if (ivp) { 01596 r_ivar(data, NULL, arg); 01597 *ivp = FALSE; 01598 } 01599 v = rb_funcall(klass, s_load, 1, data); 01600 check_load_arg(arg, s_load); 01601 v = r_entry(v, arg); 01602 v = r_leave(v, arg); 01603 } 01604 break; 01605 01606 case TYPE_USRMARSHAL: 01607 { 01608 VALUE klass = path2class(r_unique(arg)); 01609 VALUE data; 01610 01611 v = rb_obj_alloc(klass); 01612 if (!NIL_P(extmod)) { 01613 while (RARRAY_LEN(extmod) > 0) { 01614 VALUE m = rb_ary_pop(extmod); 01615 rb_extend_object(v, m); 01616 } 01617 } 01618 if (!rb_respond_to(v, s_mload)) { 01619 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'", 01620 rb_class2name(klass)); 01621 } 01622 v = r_entry(v, arg); 01623 data = r_object(arg); 01624 rb_funcall(v, s_mload, 1, data); 01625 check_load_arg(arg, s_mload); 01626 v = r_leave(v, arg); 01627 } 01628 break; 01629 01630 case TYPE_OBJECT: 01631 { 01632 st_index_t idx = r_prepare(arg); 01633 v = obj_alloc_by_path(r_unique(arg), arg); 01634 if (TYPE(v) != T_OBJECT) { 01635 rb_raise(rb_eArgError, "dump format error"); 01636 } 01637 v = r_entry0(v, idx, arg); 01638 r_ivar(v, NULL, arg); 01639 v = r_leave(v, arg); 01640 } 01641 break; 01642 01643 case TYPE_DATA: 01644 { 01645 VALUE klass = path2class(r_unique(arg)); 01646 if (rb_respond_to(klass, s_alloc)) { 01647 static int warn = TRUE; 01648 if (warn) { 01649 rb_warn("define `allocate' instead of `_alloc'"); 01650 warn = FALSE; 01651 } 01652 v = rb_funcall(klass, s_alloc, 0); 01653 check_load_arg(arg, s_alloc); 01654 } 01655 else { 01656 v = rb_obj_alloc(klass); 01657 } 01658 if (TYPE(v) != T_DATA) { 01659 rb_raise(rb_eArgError, "dump format error"); 01660 } 01661 v = r_entry(v, arg); 01662 if (!rb_respond_to(v, s_load_data)) { 01663 rb_raise(rb_eTypeError, 01664 "class %s needs to have instance method `_load_data'", 01665 rb_class2name(klass)); 01666 } 01667 rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod)); 01668 check_load_arg(arg, s_load_data); 01669 v = r_leave(v, arg); 01670 } 01671 break; 01672 01673 case TYPE_MODULE_OLD: 01674 { 01675 volatile VALUE str = r_bytes(arg); 01676 01677 v = rb_path_to_class(str); 01678 v = r_entry(v, arg); 01679 v = r_leave(v, arg); 01680 } 01681 break; 01682 01683 case TYPE_CLASS: 01684 { 01685 volatile VALUE str = r_bytes(arg); 01686 01687 v = path2class(str); 01688 v = r_entry(v, arg); 01689 v = r_leave(v, arg); 01690 } 01691 break; 01692 01693 case TYPE_MODULE: 01694 { 01695 volatile VALUE str = r_bytes(arg); 01696 01697 v = path2module(str); 01698 v = r_entry(v, arg); 01699 v = r_leave(v, arg); 01700 } 01701 break; 01702 01703 case TYPE_SYMBOL: 01704 if (ivp) { 01705 v = ID2SYM(r_symreal(arg, *ivp)); 01706 *ivp = FALSE; 01707 } 01708 else { 01709 v = ID2SYM(r_symreal(arg, 0)); 01710 } 01711 v = r_leave(v, arg); 01712 break; 01713 01714 case TYPE_SYMLINK: 01715 v = ID2SYM(r_symlink(arg)); 01716 break; 01717 01718 default: 01719 rb_raise(rb_eArgError, "dump format error(0x%x)", type); 01720 break; 01721 } 01722 return v; 01723 } 01724 01725 static VALUE 01726 r_object(struct load_arg *arg) 01727 { 01728 return r_object0(arg, 0, Qnil); 01729 } 01730 01731 static void 01732 clear_load_arg(struct load_arg *arg) 01733 { 01734 if (!arg->symbols) return; 01735 st_free_table(arg->symbols); 01736 arg->symbols = 0; 01737 st_free_table(arg->data); 01738 arg->data = 0; 01739 st_free_table(arg->compat_tbl); 01740 arg->compat_tbl = 0; 01741 } 01742 01743 /* 01744 * call-seq: 01745 * load( source [, proc] ) -> obj 01746 * restore( source [, proc] ) -> obj 01747 * 01748 * Returns the result of converting the serialized data in source into a 01749 * Ruby object (possibly with associated subordinate objects). source 01750 * may be either an instance of IO or an object that responds to 01751 * to_str. If proc is specified, it will be passed each object as it 01752 * is deserialized. 01753 */ 01754 static VALUE 01755 marshal_load(int argc, VALUE *argv) 01756 { 01757 VALUE port, proc; 01758 int major, minor, infection = 0; 01759 VALUE v; 01760 volatile VALUE wrapper; 01761 struct load_arg *arg; 01762 01763 rb_scan_args(argc, argv, "11", &port, &proc); 01764 v = rb_check_string_type(port); 01765 if (!NIL_P(v)) { 01766 infection = (int)FL_TEST(port, MARSHAL_INFECTION); /* original taintedness */ 01767 port = v; 01768 } 01769 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) { 01770 if (rb_respond_to(port, s_binmode)) { 01771 rb_funcall2(port, s_binmode, 0, 0); 01772 } 01773 infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED)); 01774 } 01775 else { 01776 rb_raise(rb_eTypeError, "instance of IO needed"); 01777 } 01778 wrapper = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg); 01779 arg->infection = infection; 01780 arg->src = port; 01781 arg->offset = 0; 01782 arg->symbols = st_init_numtable(); 01783 arg->data = st_init_numtable(); 01784 arg->compat_tbl = st_init_numtable(); 01785 arg->proc = 0; 01786 01787 major = r_byte(arg); 01788 minor = r_byte(arg); 01789 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) { 01790 clear_load_arg(arg); 01791 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\ 01792 \tformat version %d.%d required; %d.%d given", 01793 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); 01794 } 01795 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) { 01796 rb_warn("incompatible marshal file format (can be read)\n\ 01797 \tformat version %d.%d required; %d.%d given", 01798 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); 01799 } 01800 01801 if (!NIL_P(proc)) arg->proc = proc; 01802 v = r_object(arg); 01803 clear_load_arg(arg); 01804 RB_GC_GUARD(wrapper); 01805 01806 return v; 01807 } 01808 01809 /* 01810 * The marshaling library converts collections of Ruby objects into a 01811 * byte stream, allowing them to be stored outside the currently 01812 * active script. This data may subsequently be read and the original 01813 * objects reconstituted. 01814 * 01815 * Marshaled data has major and minor version numbers stored along 01816 * with the object information. In normal use, marshaling can only 01817 * load data written with the same major version number and an equal 01818 * or lower minor version number. If Ruby's ``verbose'' flag is set 01819 * (normally using -d, -v, -w, or --verbose) the major and minor 01820 * numbers must match exactly. Marshal versioning is independent of 01821 * Ruby's version numbers. You can extract the version by reading the 01822 * first two bytes of marshaled data. 01823 * 01824 * str = Marshal.dump("thing") 01825 * RUBY_VERSION #=> "1.9.0" 01826 * str[0].ord #=> 4 01827 * str[1].ord #=> 8 01828 * 01829 * Some objects cannot be dumped: if the objects to be dumped include 01830 * bindings, procedure or method objects, instances of class IO, or 01831 * singleton objects, a TypeError will be raised. 01832 * 01833 * If your class has special serialization needs (for example, if you 01834 * want to serialize in some specific format), or if it contains 01835 * objects that would otherwise not be serializable, you can implement 01836 * your own serialization strategy. 01837 * 01838 * There are two methods of doing this, your object can define either 01839 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take 01840 * precedence over _dump if both are defined. marshal_dump may result in 01841 * smaller Marshal strings. 01842 * 01843 * == marshal_dump and marshal_load 01844 * 01845 * When dumping an object the method marshal_dump will be called. 01846 * marshal_dump must return a result containing the information necessary for 01847 * marshal_load to reconstitute the object. The result can be any object. 01848 * 01849 * When loading an object dumped using marshal_dump the object is first 01850 * allocated then marshal_load is called with the result from marshal_dump. 01851 * marshal_load must recreate the object from the information in the result. 01852 * 01853 * Example: 01854 * 01855 * class MyObj 01856 * def initialize name, version, data 01857 * @name = name 01858 * @version = version 01859 * @data = data 01860 * end 01861 * 01862 * def marshal_dump 01863 * [@name, @version] 01864 * end 01865 * 01866 * def marshal_load array 01867 * @name, @version = array 01868 * end 01869 * end 01870 * 01871 * == _dump and _load 01872 * 01873 * Use _dump and _load when you need to allocate the object you're restoring 01874 * yourself. 01875 * 01876 * When dumping an object the instance method _dump is called with an Integer 01877 * which indicates the maximum depth of objects to dump (a value of -1 implies 01878 * that you should disable depth checking). _dump must return a String 01879 * containing the information necessary to reconstitute the object. 01880 * 01881 * The class method _load should take a String and use it to return an object 01882 * of the same class. 01883 * 01884 * Example: 01885 * 01886 * class MyObj 01887 * def initialize name, version, data 01888 * @name = name 01889 * @version = version 01890 * @data = data 01891 * end 01892 * 01893 * def _dump level 01894 * [@name, @version].join ':' 01895 * end 01896 * 01897 * def self._load args 01898 * new(*args.split(':')) 01899 * end 01900 * end 01901 * 01902 * Since Marhsal.dump outputs a string you can have _dump return a Marshal 01903 * string which is Marshal.loaded in _load for complex objects. 01904 */ 01905 void 01906 Init_marshal(void) 01907 { 01908 #undef rb_intern 01909 #define rb_intern(str) rb_intern_const(str) 01910 01911 VALUE rb_mMarshal = rb_define_module("Marshal"); 01912 01913 s_dump = rb_intern("_dump"); 01914 s_load = rb_intern("_load"); 01915 s_mdump = rb_intern("marshal_dump"); 01916 s_mload = rb_intern("marshal_load"); 01917 s_dump_data = rb_intern("_dump_data"); 01918 s_load_data = rb_intern("_load_data"); 01919 s_alloc = rb_intern("_alloc"); 01920 s_call = rb_intern("call"); 01921 s_getbyte = rb_intern("getbyte"); 01922 s_read = rb_intern("read"); 01923 s_write = rb_intern("write"); 01924 s_binmode = rb_intern("binmode"); 01925 01926 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1); 01927 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1); 01928 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1); 01929 01930 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR)); 01931 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR)); 01932 01933 compat_allocator_tbl = st_init_numtable(); 01934 compat_allocator_tbl_wrapper = 01935 Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl); 01936 rb_gc_register_mark_object(compat_allocator_tbl_wrapper); 01937 } 01938 01939 VALUE 01940 rb_marshal_dump(VALUE obj, VALUE port) 01941 { 01942 int argc = 1; 01943 VALUE argv[2]; 01944 01945 argv[0] = obj; 01946 argv[1] = port; 01947 if (!NIL_P(port)) argc = 2; 01948 return marshal_dump(argc, argv); 01949 } 01950 01951 VALUE 01952 rb_marshal_load(VALUE port) 01953 { 01954 return marshal_load(1, &port); 01955 } 01956
1.7.6.1