Porting continues.
authorilb@NIH.GOV <ilb@NIH.GOV@ba61647d-9d00-f842-95cd-605cb4296b96>
Thu, 19 Apr 2018 20:54:10 +0000 (20:54 +0000)
committerilb@NIH.GOV <ilb@NIH.GOV@ba61647d-9d00-f842-95cd-605cb4296b96>
Thu, 19 Apr 2018 20:54:10 +0000 (20:54 +0000)
git-svn-id: https://citdcbmipav.cit.nih.gov/repos-pub/mipav/trunk@15454 ba61647d-9d00-f842-95cd-605cb4296b96

mipav/src/gov/nih/mipav/model/algorithms/StochasticForests.java

index 729e644..3bac000 100644 (file)
@@ -155,6 +155,17 @@ public class StochasticForests extends AlgorithmBase {
            // Permuted samples for corrected impurity importance\r
            protected Vector<Integer> permuted_sampleIDs = new Vector<Integer>();\r
            \r
+           public Data() {\r
+               \r
+           }\r
+           \r
+           public Data(Vector<String>variable_names, int num_rows, int num_cols) {\r
+               this.variable_names = variable_names;\r
+               this.num_rows = num_rows;\r
+               this.num_cols = num_cols;\r
+               this.num_cols_no_snp = num_cols;\r
+           }\r
+           \r
            public void dispose() {\r
                index_data = null;\r
            }\r
@@ -544,9 +555,25 @@ public class StochasticForests extends AlgorithmBase {
 \r
        } // private class Data\r
        \r
-       private class DoubleData extends Data {\r
+       private class DataDouble extends Data {\r
            private double data[] = null;\r
            \r
+           public DataDouble() {\r
+               super();\r
+           }\r
+           \r
+           public DataDouble(double data[], Vector<String> variable_names, int num_rows,\r
+                       int num_cols) {\r
+               super(variable_names, num_rows, num_cols);\r
+               this.data = data;\r
+           }\r
+           \r
+           public void dispose() {\r
+               if (!externalData) {\r
+                       data = null;\r
+               }\r
+           }\r
+           \r
            public void reserveMemory() {\r
                data = new double[num_cols * num_rows];\r
            }\r
@@ -572,8 +599,222 @@ public class StochasticForests extends AlgorithmBase {
                }\r
              }\r
 \r
+       } // private class DataDouble extends Data\r
+       \r
+       private class DataFloat extends Data {\r
+           private float data[] = null;\r
+           \r
+           public DataFloat() {\r
+               super();\r
+           }\r
+           \r
+           public DataFloat(double data_double[], Vector<String> variable_names, int num_rows,\r
+                       int num_cols) {\r
+               super(variable_names, num_rows, num_cols);\r
+               reserveMemory();\r
+               for (int i = 0; i < num_cols; i++) {\r
+                       for (int j = 0; j < num_rows; j++) {\r
+                               data[i * num_rows + j] = (float) data_double[i * num_rows + j];\r
+                       }\r
+               }\r
+           }\r
+           \r
+           public void dispose() {\r
+               if (!externalData) {\r
+                       data = null;\r
+               }\r
+           }\r
+           \r
+           public void reserveMemory() {\r
+               data = new float[num_cols * num_rows];\r
+           }\r
+           \r
+           public void set(int col, int row, double value, boolean error[]) {\r
+               data[col * num_rows + row] = (float)value;\r
+           }\r
+           \r
+           public double get(int row, int col) {\r
+               // Use permuted data for corrected impurity importance\r
+               if (col >= num_cols) {\r
+                 col = getUnpermutedVarID(col);\r
+                 row = getPermutedSampleID(row);\r
+               }\r
+\r
+               if (col < num_cols_no_snp) {\r
+                 return data[col * num_rows + row];\r
+               } else {\r
+                 // Get data out of snp storage. -1 because of GenABEL coding.\r
+                 int idx = (col - num_cols_no_snp) * num_rows_rounded + row;\r
+                 double result = (((snp_data[idx / 4] & mask[idx % 4]) >> offset[idx % 4]) - 1);\r
+                 return result;\r
+               }\r
+             }\r
+\r
+       } // private class DataFloat extends Data\r
+       \r
+       private class DataChar extends Data {\r
+           private char data[] = null;\r
+           \r
+           public DataChar() {\r
+               super();\r
+           }\r
+           \r
+           public DataChar(double data_double[], Vector<String> variable_names, int num_rows,\r
+                       int num_cols, boolean error[]) {\r
+               super(variable_names, num_rows, num_cols);\r
+               reserveMemory();\r
+               \r
+               // Save data and report errors\r
+               for (int i = 0; i < num_cols; i++) {\r
+                       for (int j = 0; j < num_rows; j++) {\r
+                               double value = data_double[i * num_rows + j];\r
+                               if ((value > Character.MAX_VALUE) || (value < Character.MIN_VALUE)) {\r
+                                       error[0] = true;\r
+                               }\r
+                               if (Math.floor(value) != Math.ceil(value)) {\r
+                                       error[0] = true;\r
+                               }\r
+                               data[i * num_rows + j] = (char)value;\r
+                       }\r
+               }\r
+           }\r
+           \r
+           public void dispose() {\r
+               if (!externalData) {\r
+                       data = null;\r
+               }\r
+           }\r
+           \r
+           public void reserveMemory() {\r
+               data = new char[num_cols * num_rows];\r
+           }\r
+           \r
+           public void set(int col, int row, double value, boolean error[]) {\r
+               if ((value > Character.MAX_VALUE) || (value < Character.MIN_VALUE)) {\r
+                               error[0] = true;\r
+                       }\r
+                       if (Math.floor(value) != Math.ceil(value)) {\r
+                               error[0] = true;\r
+                       }\r
+               data[col * num_rows + row] = (char)value;\r
+           }\r
+           \r
+           public double get(int row, int col) {\r
+               // Use permuted data for corrected impurity importance\r
+               if (col >= num_cols) {\r
+                 col = getUnpermutedVarID(col);\r
+                 row = getPermutedSampleID(row);\r
+               }\r
+\r
+               if (col < num_cols_no_snp) {\r
+                 return data[col * num_rows + row];\r
+               } else {\r
+                 // Get data out of snp storage. -1 because of GenABEL coding.\r
+                 int idx = (col - num_cols_no_snp) * num_rows_rounded + row;\r
+                 double result = (((snp_data[idx / 4] & mask[idx % 4]) >> offset[idx % 4]) - 1);\r
+                 return result;\r
+               }\r
+             }\r
 \r
-       } // private class DoubleData extends Data\r
+       } // private class DataFloat extends Data\r
+       \r
+       private class SparseMatrix {\r
+               int num_rows;\r
+               int num_cols;\r
+               int num_nonzero_values;\r
+               // sparse matrix with 3 rows and num_nonzero_values columns, row, column, and value rows\r
+               double sm[][];\r
+               \r
+               public SparseMatrix() {\r
+                       super();\r
+               }\r
+               \r
+               public SparseMatrix(int num_rows, int num_cols) {\r
+                   super();\r
+                   this.num_rows = num_rows;\r
+                   this.num_cols = num_cols;\r
+               }\r
+               \r
+               public double coeff(int row, int col) {\r
+                       for (int i = 0; i < num_nonzero_values; i++) {\r
+                               if ((sm[0][i] == row) && (sm[1][i] == col)) {\r
+                                       return sm[2][i];\r
+                               }\r
+                       }\r
+                       return Double.NaN;\r
+               }\r
+               \r
+               // coeffRef assumes the row and column value already exist.\r
+               // If they do not, use insert\r
+               public void insert(int row, int col, double value) {\r
+                       int i, j;\r
+                       for (i = 0; i < num_nonzero_values; i++) {\r
+                               if ((sm[0][i] == row) && (sm[1][i] == col)) {\r
+                                       sm[2][i] = value;\r
+                                       return;\r
+                               }\r
+                       }\r
+                       double smtemp[][] = new double[3][num_nonzero_values];\r
+                       for (i = 0; i < 3; i++) {\r
+                               for (j = 0; j <num_nonzero_values; j++) {\r
+                                       smtemp[i][j] = sm[i][j];\r
+                               }\r
+                       }\r
+                       sm[0] = null;\r
+                       sm[1] = null;\r
+                       sm[2] = null;\r
+                       sm = null;\r
+                       num_nonzero_values = num_nonzero_values + 1;\r
+                       sm = new double[3][num_nonzero_values];\r
+                       for (i = 0; i < 3; i++) {\r
+                           for (j = 0; j < num_nonzero_values-1; j++) {\r
+                               sm[i][j] = smtemp[i][j];\r
+                           }\r
+                       }\r
+                       sm[0][num_nonzero_values-1] = row;\r
+                       sm[1][num_nonzero_values-1] = col;\r
+                       sm[2][num_nonzero_values-1] = value;\r
+                       smtemp[0] = null;\r
+                       smtemp[1] = null;\r
+                       smtemp[2] = null;\r
+                       smtemp = null;\r
+               }\r
+       }\r
+       \r
+       private class DataSparse extends Data {\r
+               private SparseMatrix data = null;\r
+               \r
+               public DataSparse() {\r
+                       super();\r
+               }\r
+               \r
+               public DataSparse(SparseMatrix data, Vector<String> variable_names, int num_rows,\r
+                               int num_cols) {\r
+                       super(variable_names, num_rows, num_cols);\r
+                       this.data = data;\r
+               }\r
+               \r
+               public void dispose() {\r
+                   if (!externalData) {\r
+                       data = null;\r
+                   }\r
+               }\r
+               \r
+               public double get(int row, int col) {\r
+                       return data.coeff(row, col);\r
+               }\r
+               \r
+               public void reserveMemory() {\r
+                       data = new SparseMatrix(num_rows, num_cols);\r
+               }\r
+               \r
+               public void set(int col, int row, double value, boolean error[]) {\r
+                       // coeffRef assumes that the (row, col) position already exists\r
+                       // Otherwise use insert\r
+                       //data.coeffRef(row,col,value);\r
+                       data.insert(row,col,value);\r
+               }\r
+       } // private class DataSparse\r
        \r
        private int roundToNextMultiple(int value, int multiple) {\r
                if (multiple == 0) {\r
@@ -599,8 +840,154 @@ public class StochasticForests extends AlgorithmBase {
                v.set(i,temp);\r
            }\r
        }\r
+       \r
+       private void equalSplit(Vector<Integer> result, int start, int end, int num_parts) {\r
+               if (result.size() < num_parts + 1) {\r
+                       result.setSize(num_parts+1);\r
+               }\r
+               \r
+               // Return range if only 1 part\r
+               if (num_parts == 1) {\r
+                       result.add(start);\r
+                       result.add(end+1);\r
+                       return;\r
+               }\r
+               \r
+               // Return vector from start to end+1 if more parts than elements\r
+               if (num_parts > end - start + 1) {\r
+                       for (int i = start; i <= end + 1; i++) {\r
+                               result.add(i);\r
+                       }\r
+                       return;\r
+               }\r
+               \r
+               int length = (end - start + 1);\r
+           int part_length_short = length / num_parts;\r
+               int part_length_long = (int) Math.ceil(length / ((double) num_parts));\r
+               int cut_pos = length % num_parts;\r
+\r
+               // Add long ranges\r
+               for (int i = start; i < start + cut_pos * part_length_long; i = i + part_length_long) {\r
+                   result.add(i);\r
+               }\r
+\r
+               // Add short ranges\r
+               for (int i = start + cut_pos * part_length_long; i <= end + 1; i = i + part_length_short) {\r
+                   result.add(i);\r
+               }\r
+\r
+       } // private void equalSplit\r
+       \r
+       void loadDoubleVectorFromFile(Vector<Double> result, String filename) { // #nocov start\r
+      String line;\r
+      int i;\r
+         // Open input file\r
+         File file = new File(filename);\r
+         BufferedReader input_file;\r
+         try {\r
+             input_file = new BufferedReader(new FileReader(file));\r
+         }\r
+         catch (FileNotFoundException e) {\r
+                 MipavUtil.displayError("Could not find file " + filename);\r
+                 return;\r
+         }\r
+\r
+         // Read the first line, ignore the rest\r
+         try {\r
+           line = input_file.readLine();\r
+         }\r
+         catch (IOException e) {\r
+                 MipavUtil.displayError("IO exception on readLine from input_file");\r
+                 return;\r
+         }\r
+         try {\r
+                 input_file.close();\r
+         }\r
+         catch (IOException e) {\r
+                 MipavUtil.displayError("IO exception on close from input_file");\r
+                 return;\r
+         }\r
+         if (line == null) {\r
+                 return;\r
+         }\r
+         String tokens[];\r
+         tokens = line.split(" ");\r
+         for (i = 0; i < tokens.length; i++) {\r
+                 double dValue = Double.valueOf(tokens[i]).doubleValue();\r
+                 result.add(dValue);\r
+         }\r
+       } // #nocov end\r
+       \r
+       private void drawWithoutReplacementSkip(Vector<Integer> result, int max,\r
+           Vector<Integer> skip, int num_samples) {\r
+         if (num_samples < max / 10) {\r
+           drawWithoutReplacementSimple(result, max, skip, num_samples);\r
+         } else {\r
+           //drawWithoutReplacementKnuth(result, max, skip, num_samples);\r
+           drawWithoutReplacementFisherYates(result, max, skip, num_samples);\r
+         }\r
+       }\r
 \r
        \r
+       private void drawWithoutReplacementSimple(Vector<Integer> result, int max,\r
+                   Vector<Integer> skip, int num_samples) {\r
+      int i, j;\r
+         if (result.size() < num_samples) {\r
+                 result.setSize(num_samples);\r
+         }\r
+\r
+         // Set all to not selected\r
+         Vector<Boolean> temp = new Vector<Boolean>();\r
+         for (i = 0; i < max; i++) {\r
+             temp.add(i, false);\r
+         }\r
+\r
+         Random random = new Random();\r
+         for (i = 0; i < num_samples; ++i) {\r
+           int draw;\r
+           do {\r
+             draw = random.nextInt(max - skip.size());\r
+             for (j = 0; j < skip.size(); j++) {\r
+               if (draw >= skip.get(j)) {\r
+                 ++draw;\r
+               }\r
+             }\r
+           } while (temp.get(draw));\r
+           temp.set(draw,true);\r
+           result.add(draw);\r
+         }\r
+       }\r
+\r
+       private void drawWithoutReplacementFisherYates(Vector<Integer> result,\r
+           int max, Vector<Integer> skip, int num_samples) {\r
+      int i, j;\r
+      int temp;\r
+         // Create indices\r
+         if (result.size() < max) {\r
+                 result.setSize(max);\r
+         }\r
+         for (i = 0; i < max; i++) {\r
+                 result.add(i);\r
+         }\r
+\r
+         // Skip indices\r
+         for (i = 0; i < skip.size(); ++i) {\r
+           result.removeElementAt((skip.get(skip.size() - 1 - i)).intValue());\r
+         }\r
+\r
+         // Draw without replacement using Fisher Yates algorithm\r
+         Random random = new Random();\r
+         for (i = 0; i < num_samples; ++i) {\r
+           j = (int)Math.round(i + random.nextDouble() * (max - skip.size() - i));\r
+           temp = result.get(i);\r
+           result.set(i,result.get(j));\r
+           result.set(j,temp);\r
+         }\r
+\r
+         result.setSize(num_samples);\r
+       }\r
+       \r
+       \r
        public void runAlgorithm() {\r
                \r
        }\r