<?xml version="1.0" encoding="UTF-8"?>
<s:scufl xmlns:s="http://org.embl.ebi.escience/xscufl/0.1alpha" version="0.2" log="0">
  <s:workflowdescription lsid="urn:lsid:www.mygrid.org.uk:operation:03POCSJ1HQ5" author="Peter Li" title="Using RServe in taverna - generating a histogram">This workflow reads a CSV file containing microarray data into R, calculates gene level frequencies and plots these as a histogram.</s:workflowdescription>
  <s:processor name="cleanMaxDOutput">
    <s:beanshell>
      <s:scriptvalue>/*
 * Clean up the output of MaxdBrowse
 */

StringBuffer sb =  new StringBuffer(in);
//Get rid of crap from start of the file
int i = sb.indexOf("Feature_ID");
sb.delete(0, i);

//Get rid of crap at the end of the file
sb.delete(sb.indexOf("#"), sb.length());

//Convert into csv
String maxd = sb.toString();
maxd = maxd.trim();

maxd = maxd.replaceFirst("Feature_ID", "FeatureID");
maxd = maxd.replaceFirst("Normalised data", "NormalisedData");
maxd = maxd.replaceAll("\t", ",");

//Remove end commas
String[] maxdLines = maxd.split("\n");
StringBuffer newMaxd = new StringBuffer();
for(int x = 0; x &lt; maxdLines.length; x ++)
{
  String line = maxdLines[x];

  if(line.endsWith(","))
  {
    line = line.substring(0, line.length() - 1);
    newMaxd.append(line + "\n");
                
  }
}

String out = newMaxd.toString();</s:scriptvalue>
      <s:beanshellinputlist>
        <s:beanshellinput s:syntactictype="'text/plain'">in</s:beanshellinput>
      </s:beanshellinputlist>
      <s:beanshelloutputlist>
        <s:beanshelloutput s:syntactictype="'text/plain'">out</s:beanshelloutput>
      </s:beanshelloutputlist>
      <s:dependencies s:classloader="iteration" />
    </s:beanshell>
  </s:processor>
  <s:processor name="measurementName" boring="true">
    <s:stringconstant>JC_C-0.07-1_Measurement</s:stringconstant>
  </s:processor>
  <s:processor name="generateHistogram">
    <s:rshell s:hostname="" s:port="6311" s:username="" s:password="" s:keepSessionAlive="false">
      png(histogram);
maxd &lt;- read.csv(file=csv,head=TRUE,sep=",")
hist(maxd$NormalisedData, method="jitter", main='Gene expn level frequencies of yeast grown in Carbon-deficient media', xlab='Gene expression level')
dev.off()
      <s:rshellInputPortList>
        <s:rshellInputPort s:syntacticType="'text/plain'" s:symanticType="TEXT_FILE">csv</s:rshellInputPort>
      </s:rshellInputPortList>
      <s:rshellOutputPortList>
        <s:rshellOutputPort s:syntacticType="'image/png'" s:symanticType="PNG_FILE">histogram</s:rshellOutputPort>
      </s:rshellOutputPortList>
    </s:rshell>
  </s:processor>
  <s:processor name="queryMaxD">
    <s:description>Will take a list of Measurement names and a list of Column Types and produce a table. All values are coloured in relation to the a reference. In the list of Measurement names, the first Measurement listed is taken to be the reference. Formatted as text. MeasurementDataTabTextFAST is slower because it also exports Reporter and Gene information.</s:description>
    <s:defaults>
      <s:default name="ColumnTypes">*</s:default>
      <s:default name="listSplitter">,</s:default>
      <s:default name="profileANDppass">ProfilePasswordRequired</s:default>
    </s:defaults>
    <s:arbitrarywsdl>
      <s:wsdl>http://dbkgroup.org/software/maxd/maxdBrowse/sequences/MeasurementDataTabTextFASTEST/wsdl.xml</s:wsdl>
      <s:operation>query</s:operation>
    </s:arbitrarywsdl>
  </s:processor>
  <s:link source="cleanMaxDOutput:out" sink="csv" />
  <s:link source="cleanMaxDOutput:out" sink="generateHistogram:csv" />
  <s:link source="generateHistogram:histogram" sink="histogram" />
  <s:link source="measurementName:value" sink="queryMaxD:MeasurementNames" />
  <s:link source="queryMaxD:return" sink="cleanMaxDOutput:in" />
  <s:link source="queryMaxD:return" sink="out" />
  <s:sink name="csv" />
  <s:sink name="histogram" />
  <s:sink name="out" />
</s:scufl>

