Come unire efficacemente due hash in Ruby API C?

https://stackoverflow.com/questions/1256975

12-09-2019
|

Domanda

Le scrivo un'estensione C per Ruby che ha davvero bisogno di unire due hash, tuttavia la funzione rb_hash_merge () è statico in Ruby 1.8.6. Ho cercato invece di utilizzo:

rb_funcall(hash1, rb_intern("merge"), 1, hash2);

, ma questo è troppo lento, e le prestazioni è molto critico in questa applicazione.

Qualcuno sa come fare per eseguire questa fusione con l'efficienza e la velocità in mente?

(Nota ho provato semplicemente guardando la fonte per rb_hash_merge () e replicare esso, ma è pieno di altre funzioni statiche, che sono essi stessi pieno di funzioni ancora più statici così sembra quasi impossibile da districare ... ho bisogno un altro modo)

Soluzione

Ok, sembra che non potrebbe essere possibile ottimizzare all'interno della API pubblicato.

Codice di prova:

#extconf.rb
require 'mkmf'
dir_config("hello")
create_makefile("hello")


// hello.c
#include "ruby.h"

static VALUE rb_mHello;
static VALUE rb_cMyCalc;

static void calc_mark(void *f) { }
static void calc_free(void *f) { }
static VALUE calc_alloc(VALUE klass) { return Data_Wrap_Struct(klass, calc_mark, calc_free, NULL); }

static VALUE calc_init(VALUE obj) { return Qnil; }

static VALUE calc_merge(VALUE obj, VALUE h1, VALUE h2) {
  return rb_funcall(h1, rb_intern("merge"), 1, h2);
}

static VALUE
calc_merge2(VALUE obj, VALUE h1, VALUE h2)
{
  VALUE h3 = rb_hash_new();
  VALUE keys;
  VALUE akey;
  keys = rb_funcall(h1, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h3, akey, rb_hash_aref(h1, akey));
  }
  keys = rb_funcall(h2, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h3, akey, rb_hash_aref(h2, akey));
  }
  return h3;
}

static VALUE
calc_merge3(VALUE obj, VALUE h1, VALUE h2)
{
  VALUE keys;
  VALUE akey;
  keys = rb_funcall(h1, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h2, akey, rb_hash_aref(h1, akey));
  }
  return h2;
}

void
Init_hello()
{
  rb_mHello = rb_define_module("Hello");
  rb_cMyCalc = rb_define_class_under(rb_mHello, "Calculator", rb_cObject);
  rb_define_alloc_func(rb_cMyCalc, calc_alloc);
  rb_define_method(rb_cMyCalc, "initialize", calc_init, 0);
  rb_define_method(rb_cMyCalc, "merge", calc_merge, 2);
  rb_define_method(rb_cMyCalc, "merge2", calc_merge, 2);
  rb_define_method(rb_cMyCalc, "merge3", calc_merge, 2);
}


# test.rb
require "hello"

h1 = Hash.new()
h2 = Hash.new()

1.upto(100000) { |x| h1[x] = x+1; }
1.upto(100000) { |x| h2["#{x}-12"] = x+1; }

c = Hello::Calculator.new()

puts c.merge(h1, h2).keys.length if ARGV[0] == "1"
puts c.merge2(h1, h2).keys.length if ARGV[0] == "2"
puts c.merge3(h1, h2).keys.length if ARGV[0] == "3"

Ora i risultati del test:

$ time ruby test.rb

real    0m1.021s
user    0m0.940s
sys     0m0.080s
$ time ruby test.rb 1
200000

real    0m1.224s
user    0m1.148s
sys     0m0.076s
$ time ruby test.rb 2
200000

real    0m1.219s
user    0m1.132s
sys     0m0.084s
$ time ruby test.rb 3
200000

real    0m1.220s
user    0m1.128s
sys     0m0.092s

Quindi sembra che potremmo radere al massimo ~ 0.004s su un'operazione 0.2s.

Dato che non c'è probabilmente molto oltre l'impostazione dei valori, potrebbe non esserci molto spazio per ulteriori ottimizzazioni. Forse tenta di incidere la fonte rubino in sé -. Ma a quel punto non è più realmente sviluppare "estensione", ma piuttosto di cambiare la lingua, quindi probabilmente non funzionerà

Se il join di hash è qualcosa che devi fare molte volte nella parte C -. Allora probabilmente utilizzando le strutture dati interne e solo esportandoli in rubino hash nel passaggio finale sarebbe l'unico modo per ottimizzare le cose

P.S. Lo scheletro iniziale per il codice preso in prestito da questo ottimo tutorial

Autorizzato sotto: CC-BY-SA insieme a attribuzione

Non affiliato a StackOverflow