Rapid miner syntax confusion

34 Views Asked by At

"Lowest" is assigned when the growth rate is equal to 0. "Highest" is assigned when the growth rate is equal to the maximum growth rate in the dataset. "Intermediate" is not assigned a label and remains the default label for all other cases.

The coding is

if([New member growth rate (%)] == 0, "Lowest", if([New member growth rate (%)] == max([New member growth rate (%)]), "Highest", "Intermediate"))

the result that suppose to be Intermediate but it all show the result as Highest//

Find out what is the error in the syntax

1

There are 1 best solutions below

0
Christian König On

while your expression is syntactically correct, you misinterpret the result of the max() function. It is returning the maximum value of all arguments - in your case you are just giving it the current value of the attribute [New member growth rate (%)]. Since this is equal to itself, the result of the if() is always Highest.

Generate Attributes works row-wise, so if you mean to get the maximum value for all different values of one attribute, you have to calculate that beforehand as an additional attribute, so you are able to reference that in your Generate Attributes expression: example workflow

Have a look at this example process to see the actual setup including parameters and expressions:

<?xml version="1.0" encoding="UTF-8"?><process version="10.1.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="10.1.003" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="10.1.003" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="10.1.003" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Samples/data/Iris"/>
          </operator>
          <operator activated="true" class="blending:rename" compatibility="10.1.003" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
            <list key="rename attributes">
              <parameter key="a1" value="New member growth rate (%)"/>
            </list>
            <parameter key="from_attribute" value=""/>
            <parameter key="to_attribute" value=""/>
          </operator>
          <operator activated="true" class="blending:select_attributes" compatibility="10.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="34">
            <parameter key="type" value="include attributes"/>
            <parameter key="attribute_filter_type" value="a subset"/>
            <parameter key="select_attribute" value=""/>
            <parameter key="select_subset" value="id␞New member growth rate (%)"/>
            <parameter key="also_apply_to_special_attributes_(id,_label..)" value="true"/>
          </operator>
          <operator activated="true" class="sample" compatibility="10.1.003" expanded="true" height="82" name="Sample" width="90" x="447" y="34">
            <parameter key="sample" value="relative"/>
            <parameter key="balance_data" value="false"/>
            <parameter key="sample_size" value="100"/>
            <parameter key="sample_ratio" value="0.1"/>
            <parameter key="sample_probability" value="0.1"/>
            <list key="sample_size_per_class"/>
            <list key="sample_ratio_per_class"/>
            <list key="sample_probability_per_class"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
          </operator>
          <operator activated="true" class="blending:generate_columns" compatibility="10.1.003" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="581" y="136">
            <list key="function_descriptions">
              <parameter key="New member growth rate (%)" value="0.0"/>
            </list>
            <parameter key="keep_all_columns" value="true"/>
          </operator>
          <operator activated="true" class="append" compatibility="10.1.003" expanded="true" height="103" name="Append" width="90" x="715" y="34">
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="merge_type" value="all"/>
          </operator>
          <connect from_op="Retrieve Iris" from_port="output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Sample" to_port="example set input"/>
          <connect from_op="Sample" from_port="example set output" to_op="Generate Attributes (2)" to_port="table input"/>
          <connect from_op="Sample" from_port="original" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Attributes (2)" from_port="table output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Append" from_port="merged set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">create example dataset</description>
      </operator>
      <operator activated="true" class="aggregate" compatibility="10.1.003" expanded="true" height="82" name="Aggregate" width="90" x="179" y="34">
        <parameter key="use_default_aggregation" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="default_aggregation_function" value="average"/>
        <list key="aggregation_attributes">
          <parameter key="New member growth rate (%)" value="maximum"/>
        </list>
        <parameter key="group_by_attributes" value=""/>
        <parameter key="count_all_combinations" value="false"/>
        <parameter key="only_distinct" value="false"/>
        <parameter key="ignore_missings" value="true"/>
        <description align="center" color="transparent" colored="false" width="126">Calculate a new aggregation, which contains the overall maximum value of the attribute</description>
      </operator>
      <operator activated="true" class="cartesian_product" compatibility="10.1.003" expanded="true" height="82" name="Cartesian" width="90" x="313" y="34">
        <parameter key="remove_double_attributes" value="true"/>
        <description align="center" color="transparent" colored="false" width="126">Add this maximum as a new &amp;quot;temporaray&amp;quot; attribute to each example.</description>
      </operator>
      <operator activated="true" class="blending:generate_columns" compatibility="10.1.003" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
        <list key="function_descriptions">
          <parameter key="label" value="if([New member growth rate (%)] == 0,&#10;  &quot;Lowest&quot;,&#10;  if([New member growth rate (%)] == [maximum(New member growth rate (%))],&#10;    &quot;Highest&quot;,&#10;    &quot;Intermediate&quot;&#10;  )&#10;)"/>
        </list>
        <parameter key="keep_all_columns" value="true"/>
        <description align="center" color="transparent" colored="false" width="126">change the expression to compare with the new attribute</description>
      </operator>
      <operator activated="true" class="blending:select_attributes" compatibility="10.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="581" y="34">
        <parameter key="type" value="exclude attributes"/>
        <parameter key="attribute_filter_type" value="one attribute"/>
        <parameter key="select_attribute" value="maximum(New member growth rate (%))"/>
        <parameter key="select_subset" value=""/>
        <parameter key="also_apply_to_special_attributes_(id,_label..)" value="false"/>
        <description align="center" color="transparent" colored="false" width="126">remove temporary attribute</description>
      </operator>
      <connect from_op="Subprocess" from_port="out 1" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Cartesian" to_port="left"/>
      <connect from_op="Aggregate" from_port="original" to_op="Cartesian" to_port="right"/>
      <connect from_op="Cartesian" from_port="join" to_op="Generate Attributes" to_port="table input"/>
      <connect from_op="Generate Attributes" from_port="table output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>