Porting continues.
authorilb@NIH.GOV <ilb@NIH.GOV@ba61647d-9d00-f842-95cd-605cb4296b96>
Mon, 23 Apr 2018 22:15:34 +0000 (22:15 +0000)
committerilb@NIH.GOV <ilb@NIH.GOV@ba61647d-9d00-f842-95cd-605cb4296b96>
Mon, 23 Apr 2018 22:15:34 +0000 (22:15 +0000)
git-svn-id: https://citdcbmipav.cit.nih.gov/repos-pub/mipav/trunk@15456 ba61647d-9d00-f842-95cd-605cb4296b96

mipav/src/gov/nih/mipav/model/algorithms/StochasticForests.java

index 3bac000..f887555 100644 (file)
@@ -1,8 +1,5 @@
 package gov.nih.mipav.model.algorithms;\r
 \r
-\r
-\r
-import gov.nih.mipav.model.structures.*;\r
 import gov.nih.mipav.view.*;\r
 import java.io.*;\r
 import java.util.*;\r
@@ -131,7 +128,9 @@ public class StochasticForests extends AlgorithmBase {
        \r
        public static int RAND_MAX = 32767;\r
        \r
-       private class Data {\r
+       public static double logprop;\r
+       \r
+       private abstract class Data {\r
                protected Vector<String> variable_names;\r
                protected int num_rows = 0;\r
                protected int num_rows_rounded = 0;\r
@@ -170,6 +169,8 @@ public class StochasticForests extends AlgorithmBase {
                index_data = null;\r
            }\r
            \r
+           public abstract double get(int row, int column);\r
+           \r
            public int getVariableID(String variable_name) {\r
                 for (i = 0; i < variable_names.size(); i++) {\r
                         if(variable_names.get(i).equals(variable_name)) {\r
@@ -398,18 +399,9 @@ public class StochasticForests extends AlgorithmBase {
                } // for (int col = 0; col < num_cols_no_snp; col++) {\r
            } // public void sort()\r
            \r
-           public void reserveMemory() {\r
-               \r
-           };\r
+           public abstract void reserveMemory();\r
            \r
-           public void set(int col, int row, double value, boolean error[]) {\r
-               \r
-           }\r
-           \r
-           \r
-           public double get(int row, int col) {\r
-               return 0.0;\r
-           }\r
+           public abstract void set(int col, int row, double value, boolean error[]);\r
            \r
            public int getUnpermutedVarID(int varID) {\r
                if (varID >= num_cols) {\r
@@ -553,7 +545,7 @@ public class StochasticForests extends AlgorithmBase {
            }\r
 \r
 \r
-       } // private class Data\r
+       }; // private class Data\r
        \r
        private class DataDouble extends Data {\r
            private double data[] = null;\r
@@ -987,7 +979,548 @@ public class StochasticForests extends AlgorithmBase {
          result.setSize(num_samples);\r
        }\r
        \r
+       private void drawWithoutReplacementWeighted(Vector<Integer> result, Vector<Integer> indices,\r
+                       int num_samples, Vector<Double> weights) {\r
+               int i, j;\r
+               int draw = 0;\r
+               double sum_of_weight = 0;\r
+               double rand;\r
+               if (result.size() < num_samples) {\r
+                       result.setSize(num_samples);\r
+               }\r
+               \r
+               // Set all to not selected\r
+               Vector<Boolean> temp = new Vector<Boolean>();\r
+               for (i = 0; i < indices.size(); i++) {\r
+                       temp.add(false);\r
+               }\r
+               \r
+               Random random = new Random();\r
+               \r
+               for (i = 0; i < weights.size(); i++) {\r
+                       sum_of_weight = sum_of_weight + weights.get(i);\r
+               }\r
+               \r
+               for (i = 0; i < num_samples; i++) {\r
+                   do {\r
+                       rand = sum_of_weight * random.nextDouble();\r
+                       for (j = 0; j < weights.size(); j++) {\r
+                           if (rand < weights.get(j)) {\r
+                               draw = j;\r
+                               break;\r
+                           }\r
+                           else {\r
+                               rand = rand - weights.get(j);\r
+                           }\r
+                       }\r
+                   } while(temp.get(draw));\r
+                   temp.set(draw, true);\r
+                   result.add(indices.get(draw));\r
+               }\r
+       }\r
+       \r
+       private void drawWithoutReplacementWeighted(Vector<Integer> result, int max_index, \r
+                       int num_samples, Vector<Double> weights) {\r
+               int i, j;\r
+               int draw = 0;\r
+               double sum_of_weight = 0;\r
+               double rand;\r
+               if (result.size() < num_samples) {\r
+                       result.setSize(num_samples);\r
+               }\r
+               \r
+               // Set all to not selected\r
+               Vector<Boolean> temp = new Vector<Boolean>();\r
+               for (i = 0; i < max_index+1; i++) {\r
+                       temp.add(false);\r
+               }\r
+               \r
+               Random random = new Random();\r
+               \r
+               for (i = 0; i < weights.size(); i++) {\r
+                       sum_of_weight = sum_of_weight + weights.get(i);\r
+               }\r
+               \r
+               for (i = 0; i < num_samples; i++) {\r
+                   do {\r
+                       rand = sum_of_weight * random.nextDouble();\r
+                       for (j = 0; j < weights.size(); j++) {\r
+                           if (rand < weights.get(j)) {\r
+                               draw = j;\r
+                               break;\r
+                           }\r
+                           else {\r
+                               rand = rand - weights.get(j);\r
+                           }\r
+                       }\r
+                   } while(temp.get(draw));\r
+                   temp.set(draw, true);\r
+                   result.add(draw);\r
+               }       \r
+       }\r
        \r
+       private double mostFrequentValue(HashMap<Double, Integer> class_count) {\r
+               int i;\r
+               int value;\r
+               double key;\r
+               int select;\r
+               Vector<Double> major_classes = new Vector<Double>();\r
+               // Find maximum count;\r
+               int max_count = 0;\r
+               Object keys[] = class_count.keySet().toArray();\r
+               Object values[] = class_count.values().toArray();\r
+               for (i = 0; i < class_count.size(); i++) {\r
+                   value = (int)values[i];     \r
+                   key = (double)keys[i];\r
+                   if (value > max_count) {\r
+                       max_count = value;\r
+                       major_classes.clear();\r
+                       major_classes.add(key);\r
+                   }\r
+                   else if (value == max_count) {\r
+                       major_classes.add(key);\r
+                   }\r
+               }\r
+               \r
+               if (major_classes.size() == 1) {\r
+                       return major_classes.get(0);\r
+               }\r
+               else {\r
+                       // Choose randomly\r
+                       Random random = new Random();\r
+                       select = random.nextInt(major_classes.size());\r
+                       return major_classes.get(select);\r
+               }\r
+       }\r
+       \r
+       private double computeConcordanceIndex(Data data, Vector<Double> sum_chf, \r
+                       int dependent_varID, int status_varID, Vector<Integer> sample_IDs) {\r
+               int i, j;\r
+           // Compute concordance index\r
+               double concordance = 0.0;\r
+               double permissible = 0.0;\r
+               for (i = 0; i < sum_chf.size(); i++) {\r
+                       int sample_i = i;\r
+                       if (!sample_IDs.isEmpty()) {\r
+                               sample_i = sample_IDs.get(i);\r
+                       }\r
+                       double time_i = data.get(sample_i, dependent_varID);\r
+                       double status_i = data.get(sample_i,  status_varID);\r
+               \r
+                       for ( j = i + 1; j < sum_chf.size(); ++j) {\r
+                             int sample_j = j;\r
+                             if (!sample_IDs.isEmpty()) {\r
+                               sample_j = sample_IDs.get(j);\r
+                             }\r
+                             double time_j = data.get(sample_j, dependent_varID);\r
+                             double status_j = data.get(sample_j, status_varID);\r
+       \r
+                             if (time_i < time_j && status_i == 0) {\r
+                               continue;\r
+                             }\r
+                             if (time_j < time_i && status_j == 0) {\r
+                               continue;\r
+                             }\r
+                             if (time_i == time_j && status_i == status_j) {\r
+                               continue;\r
+                             }\r
+       \r
+                             permissible += 1;\r
+       \r
+                             if (time_i < time_j && sum_chf.get(i) > sum_chf.get(j)) {\r
+                               concordance += 1;\r
+                             } else if (time_j < time_i && sum_chf.get(j) > sum_chf.get(i)) {\r
+                               concordance += 1;\r
+                             } else if (sum_chf.get(i) == sum_chf.get(j)) {\r
+                               concordance += 0.5;\r
+                             }\r
+       \r
+                           }\r
+                 }\r
+\r
+                 return (concordance / permissible);\r
+\r
+       }\r
+       \r
+       private String uintToString(int number) {\r
+               return String.valueOf(number);\r
+       }\r
+       \r
+       private String beautifyTime(int seconds) { // #nocov start\r
+                 String result;\r
+\r
+                 // Add seconds, minutes, hours, days if larger than zero\r
+                 int out_seconds = seconds % 60;\r
+                 result = uintToString(out_seconds) + " seconds";\r
+                 int out_minutes = (seconds / 60) % 60;\r
+                 if (seconds / 60 == 0) {\r
+                   return result;\r
+                 } else if (out_minutes == 1) {\r
+                   result = "1 minute, " + result;\r
+                 } else {\r
+                   result = uintToString(out_minutes) + " minutes, " + result;\r
+                 }\r
+                 int out_hours = (seconds / 3600) % 24;\r
+                 if (seconds / 3600 == 0) {\r
+                   return result;\r
+                 } else if (out_hours == 1) {\r
+                   result = "1 hour, " + result;\r
+                 } else {\r
+                   result = uintToString(out_hours) + " hours, " + result;\r
+                 }\r
+                 int out_days = (seconds / 86400);\r
+                 if (out_days == 0) {\r
+                   return result;\r
+                 } else if (out_days == 1) {\r
+                   result = "1 day, " + result;\r
+                 } else {\r
+                   result = uintToString(out_days) + " days, " + result;\r
+                 }\r
+                 return result;\r
+    } // #nocov end\r
+       \r
+       private void splitString(Vector<String> result, String input, String split_string) { // #nocov start\r
+        int i;\r
+               if (input == null) {\r
+                       return;\r
+               }\r
+               String tokens[];\r
+        tokens = input.split(split_string);\r
+        for (i = 0; i < tokens.length; i++) {\r
+               result.add(tokens[i]);\r
+        }\r
+    } // #nocov end\r
+\r
+       private void shuffleAndSplit(Vector<Integer> first_part, Vector<Integer> second_part, \r
+                       int n_all, int n_first) {\r
+          int i;\r
+                 // Reserve space\r
+                 first_part.setSize(n_all);\r
+\r
+                 // Fill with 0..n_all-1 and shuffle\r
+                 \r
+                 for (i = 0; i < n_all; i++) {\r
+                         first_part.set(i, i);\r
+                 }\r
+                 shuffle(first_part);\r
+\r
+                 // Copy to second part\r
+                 second_part.setSize(n_all - n_first);\r
+                 for (i = n_first; i < first_part.size(); i++) {\r
+                         second_part.set(i - n_first, first_part.get(i));\r
+                 }\r
+\r
+                 // Resize first part\r
+                 first_part.setSize(n_first);\r
+       }\r
+       \r
+       private void shuffleAndSplitAppend(Vector<Integer> first_part, Vector<Integer> second_part, \r
+                       int n_all, int n_first, Vector<Integer> mapping) {\r
+                 int i, j;\r
+                 // Old end is start position for new data\r
+                 int first_old_size = first_part.size();\r
+                 int second_old_size = second_part.size();\r
+\r
+                 // Reserve space\r
+                 first_part.setSize(first_old_size + n_all);\r
+                 int first_start_pos = first_old_size;\r
+\r
+                 // Fill with 0..n_all-1 and shuffle\r
+                 Vector<Integer>fp2 = new Vector<Integer>();\r
+                 for (i = first_start_pos; i < first_part.size(); i++) {\r
+                         fp2.add(i - first_start_pos);\r
+                 }\r
+                 shuffle(fp2);\r
+                 for (i = first_start_pos; i < first_part.size(); i++) {\r
+                         first_part.set(i, fp2.get(i-first_start_pos));\r
+                 }\r
+\r
+                 // Mapping\r
+                 for (j = first_start_pos; j != first_part.size(); ++j) {\r
+                   first_part.set(j, mapping.get(first_part.get(j)));\r
+                 }\r
+\r
+                 // Copy to second part\r
+                 second_part.setSize(second_part.size() + n_all - n_first);\r
+                 int second_start_pos = second_old_size;\r
+                 for (i = first_start_pos + n_first; i < first_part.size(); i++) {\r
+                         second_part.set(i - first_start_pos - n_first + second_start_pos, \r
+                                         first_part.get(i));\r
+                 }\r
+\r
+                 // Resize first part\r
+                 first_part.setSize(first_old_size + n_first);\r
+    }\r
+\r
+       private String checkUnorderedVariables(Data data, Vector<String> unordered_variable_names) { // #nocov start\r
+               int i;  \r
+               int num_rows = data.getNumRows();\r
+                 Vector<Integer> sampleIDs = new Vector<Integer>();\r
+                 for (i = 0; i < num_rows; i++) {\r
+                         sampleIDs.add(i);\r
+                 }\r
+\r
+                 // Check for all unordered variables\r
+                 for (i = 0; i < unordered_variable_names.size(); i++) {\r
+                       String variable_name = unordered_variable_names.get(i);\r
+                   int varID = data.getVariableID(variable_name);\r
+                   Vector<Double> all_values = new Vector<Double>();\r
+                   data.getAllValues(all_values, sampleIDs, varID);\r
+\r
+                   // Check level count\r
+                   int max_level_count = 8 * 4 - 1;\r
+                   if (all_values.size() > max_level_count) {\r
+                     return "Too many levels in unordered categorical variable " + variable_name + ". Only "\r
+                         + uintToString(max_level_count) + " levels allowed on this system.";\r
+                   }\r
+\r
+                   // Check positive integers\r
+                   if (!checkPositiveIntegers(all_values)) {\r
+                     return "Not all values in unordered categorical variable " + variable_name + " are positive integers.";\r
+                   }\r
+                 }\r
+                 return "";\r
+    } // #nocov end\r
+       \r
+       private boolean checkPositiveIntegers(Vector<Double> all_values) { // #nocov start\r
+                 int i;\r
+                 for (i = 0; i < all_values.size(); i++) {\r
+                       double value = all_values.get(i);\r
+                   if (value < 1 || !(Math.floor(value) == value)) {\r
+                     return false;\r
+                   }\r
+                 }\r
+                 return true;\r
+    } // #nocov end\r
+       \r
+       private double maxstatPValueLau92(double b, double minprop, double maxprop) {\r
+\r
+                 if (b < 1) {\r
+                   return 1.0;\r
+                 }\r
+\r
+                 // Compute only once (minprop/maxprop don't change during runtime)\r
+                 logprop = Math.log((maxprop * (1 - minprop)) / ((1 - maxprop) * minprop));\r
+\r
+                 double db = dstdnorm(b);\r
+                 double p = 4 * db / b + db * (b - 1 / b) * logprop;\r
+\r
+                 if (p > 0) {\r
+                   return p;\r
+                 } else {\r
+                   return 0;\r
+                 }\r
+    }\r
+\r
+       \r
+       private double maxstatPValueLau94(double b, double minprop, double maxprop, int N, Vector<Integer> m) {\r
+\r
+                 double D = 0;\r
+                 for (int i = 0; i < m.size() - 1; ++i) {\r
+                   double m1 = m.get(i);\r
+                   double m2 = m.get(i + 1);\r
+\r
+                   double t = Math.sqrt(1.0 - m1 * (N - m2) / ((N - m1) * m2));\r
+                   D += 1 / Math.PI * Math.exp(-b * b / 2) * (t - (b * b / 4 - 1) * (t * t * t) / 6);\r
+                 }\r
+\r
+                 return 2 * (1 - pstdnorm(b)) + D;\r
+    }\r
+       \r
+       private double maxstatPValueUnadjusted(double b) {\r
+         return 2 * pstdnorm(-b);\r
+       }\r
+\r
+       private double dstdnorm(double x) {\r
+         return Math.exp(-0.5 * x * x) / Math.sqrt(2 * Math.PI);\r
+       }\r
+\r
+       private double pstdnorm(double x) {\r
+         return 0.5 * (1 + erf(x / Math.sqrt(2.0)));\r
+       }\r
+       \r
+       \r
+       // Error function erf(x) from Computation of Special Functions by \r
+       // Shanjie Zhang and Jianming Jin. pp. 622-623.\r
+       private double erf(double x) {\r
+               double eps = 1.0E-15;\r
+               double x2, er, r, c0, err;\r
+               int k;\r
+               x2 = x * x;\r
+               if (Math.abs(x) < 3.5) {\r
+                       er = 1.0;\r
+                       r = 1.0;\r
+                       for (k = 1; k <= 50; k++) {\r
+                               r = r*x2/(k + 0.5);\r
+                               er = er + r;\r
+                               if (Math.abs(r) <= Math.abs(er)*eps) {\r
+                                       break;\r
+                               }\r
+                       } // for (k = 1; k <= 50; k++)\r
+                       c0 = 2.0/Math.sqrt(Math.PI) * x * Math.exp(-x2);\r
+                       err = c0 * er;\r
+               } // if (Math.abs(x) < 3.5)\r
+               else {\r
+                       er = 1.0;\r
+                       r = 1.0;\r
+                       for (k = 1; k <= 12; k++) {\r
+                               r = -r*(k - 0.5)/x2;\r
+                               er = er + r;\r
+                       }\r
+                       c0 = Math.exp(-x2)/(Math.abs(x) * Math.sqrt(Math.PI));\r
+                       err = 1.0 - c0 * er;\r
+                       if (x < 0.0) err = -err;\r
+               } // else\r
+               return err;\r
+       }\r
+\r
+       private Vector<Double> adjustPvalues(Vector<Double> unadjusted_pvalues) {\r
+                 int i;\r
+                 int idx, idx_last;\r
+                 int num_pvalues = unadjusted_pvalues.size();\r
+                 Vector<Double>adjusted_pvalues = new Vector<Double>();\r
+                 for (i = 0; i < num_pvalues; i++) {\r
+                         adjusted_pvalues.add(0.0);\r
+                 }\r
+\r
+                 // Get order of p-values\r
+                 ArrayList<indexValueItem> ivList = new ArrayList<indexValueItem>();\r
+                 for (i = 0; i < unadjusted_pvalues.size(); i++) {\r
+                     ivList.add(new indexValueItem(i, unadjusted_pvalues.get(i))); \r
+                 }\r
+                 Collections.sort(ivList, new indexValueDescendingComparator());\r
+                 Vector<Integer> indices = new Vector<Integer>();\r
+                 for (i = 0; i < unadjusted_pvalues.size(); i++) {\r
+                     indices.add(ivList.get(i).getIndex());  \r
+                 }\r
+\r
+                 // Compute adjusted p-values\r
+                 adjusted_pvalues.set(indices.get(0),unadjusted_pvalues.get(indices.get(0)));\r
+                 for (i = 1; i < indices.size(); ++i) {\r
+                   idx = indices.get(i);\r
+                   idx_last = indices.get(i - 1);\r
+\r
+                   adjusted_pvalues.set(idx,Math.min(adjusted_pvalues.get(idx_last),\r
+                       (double) num_pvalues / (double) (num_pvalues - i) * unadjusted_pvalues.get(idx)));\r
+                 }\r
+                 return adjusted_pvalues;\r
+    }\r
+       \r
+       private class indexValueComparator implements Comparator<indexValueItem> {\r
+                // Sort in ascending order\r
+                public int compare(final indexValueItem o1, final indexValueItem o2) {\r
+                   final double a = o1.getValue();\r
+                   final double b = o2.getValue();\r
+\r
+                   if (a < b) {\r
+                       return -1;\r
+                   } else if (a > b) {\r
+                       return 1;\r
+                   } else {\r
+                       return 0;\r
+                   }\r
+               }       \r
+       }\r
+       \r
+       private class indexValueDescendingComparator implements Comparator<indexValueItem> {\r
+                // Sort in descending order\r
+                public int compare(final indexValueItem o1, final indexValueItem o2) {\r
+                   final double a = o1.getValue();\r
+                   final double b = o2.getValue();\r
+\r
+                   if (a < b) {\r
+                       return 1;\r
+                   } else if (a > b) {\r
+                       return -1;\r
+                   } else {\r
+                       return 0;\r
+                   }\r
+               }       \r
+       }\r
+       \r
+       private class indexValueItem {\r
+\r
+        /** DOCUMENT ME! */\r
+        private final int index;\r
+\r
+        /** DOCUMENT ME! */\r
+        private final double value;\r
+\r
+        /**\r
+         * Creates a new indexValueItem object.\r
+         * \r
+         * @param index\r
+         * @param value\r
+         */\r
+        public indexValueItem(final int index, final double value) {\r
+            this.index = index;\r
+            this.value = value;\r
+        }\r
+\r
+        /**\r
+         * DOCUMENT ME!\r
+         * \r
+         * @return DOCUMENT ME!\r
+         */\r
+        public int getIndex() {\r
+            return index;\r
+        }\r
+\r
+        /**\r
+         * DOCUMENT ME!\r
+         * \r
+         * @return DOCUMENT ME!\r
+         */\r
+        public double getValue() {\r
+            return value;\r
+        }\r
+\r
+    }\r
+       \r
+       \r
+       private Vector<Double> logrankScores(Vector<Double> time, Vector<Double> status) {\r
+                 int i, j;\r
+                 int n = time.size();\r
+                 Vector<Double>scores = new Vector<Double>();\r
+                 scores.setSize(n);\r
+\r
+                 // Get order of timepoints\r
+                 ArrayList<indexValueItem> ivList = new ArrayList<indexValueItem>();\r
+                 for (i = 0; i < n; i++) {\r
+                     ivList.add(new indexValueItem(i, time.get(i))); \r
+                 }\r
+                 Collections.sort(ivList, new indexValueComparator());\r
+                 Vector<Integer> indices = new Vector<Integer>();\r
+                 for (i = 0; i < n; i++) {\r
+                     indices.add(ivList.get(i).getIndex());  \r
+                 }\r
+\r
+                 // Compute scores\r
+                 double cumsum = 0;\r
+                 int last_unique = -1;\r
+                 for (i = 0; i < n; ++i) {\r
+\r
+                   // Continue if next value is the same\r
+                   if (i < n - 1 && time.get(indices.get(i)) == time.get(indices.get(i + 1))) {\r
+                     continue;\r
+                   }\r
+\r
+                   // Compute sum and scores for all non-unique values in a row\r
+                   for (j = last_unique + 1; j <= i; ++j) {\r
+                     cumsum += status.get(indices.get(j)) / (n - i);\r
+                   }\r
+                   for (j = last_unique + 1; j <= i; ++j) {\r
+                     scores.set(indices.get(j),status.get(indices.get(j)) - cumsum);\r
+                   }\r
+\r
+                   // Save last computed value\r
+                   last_unique = i;\r
+                 }\r
+\r
+                 return scores;\r
+    }\r
+\r
+\r
        public void runAlgorithm() {\r
                \r
        }\r