Ruby C API에서 두 해시를 효율적으로 병합하는 방법은 무엇입니까?

https://stackoverflow.com/questions/1256975

12-09-2019
|

문제

나는 두 개의 해시를 실제로 병합 해야하는 루비에 대한 C 확장을 작성하고 있지만 rb_hash_merge () 함수는 루비 1.8.6에서 정적입니다. 대신 사용하려고 노력했습니다.

rb_funcall(hash1, rb_intern("merge"), 1, hash2);

그러나 이것은 너무 느리고이 응용 프로그램에서 성능이 매우 중요합니다.

효율성과 속도를 염두에 두고이 병합을 수행하는 방법을 아는 사람이 있습니까?

(참고 나는 단순히 RB_HASH_MERGE ()의 소스를보고 그것을 복제하려고 시도했지만 다른 정적 함수로 가득 차 있습니다. 그 자체는 더 많은 정적 기능으로 가득 차있어서 분리하는 것이 거의 불가능 해 보입니다 ... 다른 방법이 필요합니다).

해결책

좋아, 게시 된 API 내에서 최적화 할 수없는 것처럼 보인다.

테스트 코드 :

#extconf.rb
require 'mkmf'
dir_config("hello")
create_makefile("hello")


// hello.c
#include "ruby.h"

static VALUE rb_mHello;
static VALUE rb_cMyCalc;

static void calc_mark(void *f) { }
static void calc_free(void *f) { }
static VALUE calc_alloc(VALUE klass) { return Data_Wrap_Struct(klass, calc_mark, calc_free, NULL); }

static VALUE calc_init(VALUE obj) { return Qnil; }

static VALUE calc_merge(VALUE obj, VALUE h1, VALUE h2) {
  return rb_funcall(h1, rb_intern("merge"), 1, h2);
}

static VALUE
calc_merge2(VALUE obj, VALUE h1, VALUE h2)
{
  VALUE h3 = rb_hash_new();
  VALUE keys;
  VALUE akey;
  keys = rb_funcall(h1, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h3, akey, rb_hash_aref(h1, akey));
  }
  keys = rb_funcall(h2, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h3, akey, rb_hash_aref(h2, akey));
  }
  return h3;
}

static VALUE
calc_merge3(VALUE obj, VALUE h1, VALUE h2)
{
  VALUE keys;
  VALUE akey;
  keys = rb_funcall(h1, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h2, akey, rb_hash_aref(h1, akey));
  }
  return h2;
}

void
Init_hello()
{
  rb_mHello = rb_define_module("Hello");
  rb_cMyCalc = rb_define_class_under(rb_mHello, "Calculator", rb_cObject);
  rb_define_alloc_func(rb_cMyCalc, calc_alloc);
  rb_define_method(rb_cMyCalc, "initialize", calc_init, 0);
  rb_define_method(rb_cMyCalc, "merge", calc_merge, 2);
  rb_define_method(rb_cMyCalc, "merge2", calc_merge, 2);
  rb_define_method(rb_cMyCalc, "merge3", calc_merge, 2);
}


# test.rb
require "hello"

h1 = Hash.new()
h2 = Hash.new()

1.upto(100000) { |x| h1[x] = x+1; }
1.upto(100000) { |x| h2["#{x}-12"] = x+1; }

c = Hello::Calculator.new()

puts c.merge(h1, h2).keys.length if ARGV[0] == "1"
puts c.merge2(h1, h2).keys.length if ARGV[0] == "2"
puts c.merge3(h1, h2).keys.length if ARGV[0] == "3"

이제 테스트 결과 :

$ time ruby test.rb

real    0m1.021s
user    0m0.940s
sys     0m0.080s
$ time ruby test.rb 1
200000

real    0m1.224s
user    0m1.148s
sys     0m0.076s
$ time ruby test.rb 2
200000

real    0m1.219s
user    0m1.132s
sys     0m0.084s
$ time ruby test.rb 3
200000

real    0m1.220s
user    0m1.128s
sys     0m0.092s

따라서 0.2S 작업에서 최대 ~ 0.004s에서 면도 할 수 있습니다.

값을 설정하는 것 외에 그다지 많지 않을 경우 더 많은 최적화를위한 공간이 많지 않을 수 있습니다. 어쩌면 루비 소스 자체를 해킹하려고 시도 할 수도 있습니다. 그러나 그 시점에서 당신은 더 이상 "확장"을 실제로 개발하지 않고 언어를 변경하므로 작동하지 않을 것입니다.

해시의 결합이 C 부분에서 여러 번해야 할 일이라면 내부 데이터 구조를 사용하고 최종 패스에서 루비 해시로 내보내는 것이 사물을 최적화하는 유일한 방법 일 것입니다.

추신 : 빌린 코드의 초기 골격 이 훌륭한 튜토리얼

라이센스 : CC-BY-SA ~와 함께 속성

제휴하지 않습니다 StackOverflow