formatting vectors when importing to Rapidminer

Question

The "Pivot" operator will do this job for you. Set docid as your group attribute and word as the index attribute and you will get something similar to what you want. To get exactly what you want, you will have to remove the id attribute before, rename the attributes and replace the missings afterwards.

I have built a small example for you. Ignore the CSV operator and replace it with your "Read database" operator.

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.3.009">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.3.009" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="read_csv" compatibility="5.3.009" expanded="true" height="60" name="Read CSV" width="90" x="45" y="30">
            <parameter key="csv_file" value="~/temp/stackoverflow/vectors.csv"/>
            <parameter key="trim_lines" value="true"/>
            <parameter key="first_row_as_names" value="false"/>
            <list key="annotations">
              <parameter key="0" value="Name"/>
            </list>
            <parameter key="encoding" value="UTF-8"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="id.true.integer.attribute"/>
              <parameter key="1" value="docid.true.integer.attribute"/>
              <parameter key="2" value="word.true.polynominal.attribute"/>
              <parameter key="3" value="weight.true.real.attribute"/>
              <parameter key="4" value="class/label.true.binominal.attribute"/>
              <parameter key="5" value="timestamp.true.binominal.attribute"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.3.009" expanded="true" height="76" name="Remove id" width="90" x="179" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="id"/>
            <parameter key="regular_expression" value="id_.*"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="pivot" compatibility="5.3.009" expanded="true" height="76" name="Pivot" width="90" x="313" y="30">
            <parameter key="group_attribute" value="docid"/>
            <parameter key="index_attribute" value="word"/>
          </operator>
          <operator activated="true" class="rename_by_replacing" compatibility="5.3.009" expanded="true" height="76" name="Remove prefix" width="90" x="447" y="30">
            <parameter key="replace_what" value="weight_"/>
          </operator>
          <operator activated="true" class="replace_missing_values" compatibility="5.3.009" expanded="true" height="94" name="Replace Missing Values" width="90" x="581" y="30">
            <parameter key="attribute_filter_type" value="value_type"/>
            <parameter key="value_type" value="numeric"/>
            <parameter key="default" value="zero"/>
            <list key="columns"/>
          </operator>
          <connect from_op="Read CSV" from_port="output" to_op="Remove id" to_port="example set input"/>
          <connect from_op="Remove id" from_port="example set output" to_op="Pivot" to_port="example set input"/>
          <connect from_op="Pivot" from_port="example set output" to_op="Remove prefix" to_port="example set input"/>
          <connect from_op="Remove prefix" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
          <connect from_op="Replace Missing Values" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>