55 Customized Sorting in R

Xuchen Wang

The idea of this package comes from the lack of sorting function in R, which uses the order function to sort and really caused some confusion for me. In addition, user defined functions are not applicable, so I would like to implement the sort function by myself.

I am using the idea of merge sort for these sorting functions which runs in O(nlogn).

This file contains two functions, aiming to solve the following question

  1. sort a list with user-defined comparison function

eg:

lst=[[2,2,8],[3,6,7],[9,5,2]],

compare<-function(x,y) { x[[2]]<y[[2]] }

lst<-sort(lst,compare)

lst would be [[2,2,8],[9,5,2],[3,6,7]] now

lstmerge<-function(l1,l2,comp) {
  m<-length(l1)
  n<-length(l2)
  i<-1
  j<-1
  output<-vector("list",m+n)
  while(i<=m&&j<=n) {
    if(comp(l1[[i]],l2[[j]])) {
      output[[i+j-1]]<-l1[[i]]
      i<-i+1
    } else {
      output[[i+j-1]]<-l2[[j]]
      j<-j+1
    }
  }
  while(i<=m) {
     output[[i+j-1]]<-l1[[i]]
    i<-i+1
  }
   while(j<=n) {
     output[[i+j-1]]<-l2[[j]]
    j<-j+1
   }
  output
}

lstsort<-function(lst,comp) {
  n<-length(lst)
  if(n<=1) {
    return(lst)
  }
  mid<-ceiling(n/2)
  lft<-lstsort(lst[(1:mid)],comp)
  rt<-lstsort(lst[((mid+1):n)],comp)
  return(lstmerge(lft,rt,comp))
  
}

example:

lst= lapply(1:100, function(x) as.list(sample(1:100,size=2)))
sapply(lst,function(x) x[[2]])
##   [1]  22  73  90  13  30  50  81  67  33  27  12  89  53  55  97  82  76  13
##  [19]  99  89   8  36  66  50  63  45  91  25  15  42  54  23  81  11  40  95
##  [37]  37  30  31  32  79  91  54  56  54  55  31  82  37  11  99  52   7  78
##  [55]  23  14  72  75  89  69  65  25  64  39  79  84  31  57  49  47  20  97
##  [73]  78  42  79  66  29  43  75  22  70  91  94  62  57  84  35  10  38  46
##  [91] 100  14   4  20  96  94  94  61  97  38
compare<-function(x,y) {
  x[[2]]<y[[2]]
}

lst<-lstsort(lst,compare)
sapply(lst,function(x) x[[2]])
##   [1]   4   7   8  10  11  11  12  13  13  14  14  15  20  20  22  22  23  23
##  [19]  25  25  27  29  30  30  31  31  31  32  33  35  36  37  37  38  38  39
##  [37]  40  42  42  43  45  46  47  49  50  50  52  53  54  54  54  55  55  56
##  [55]  57  57  61  62  63  64  65  66  66  67  69  70  72  73  75  75  76  78
##  [73]  78  79  79  79  81  81  82  82  84  84  89  89  89  90  91  91  91  94
##  [91]  94  94  95  96  97  97  97  99  99 100

to sort a list of vector just change the compare function:

lst= lapply(1:100, function(x) sample(1:100,size=2))
sapply(lst,function(x) x[2])
##   [1] 85 55 62 17 49 92 29 62 32 30  1  6 35  4 14 50 67 49  3 26 17 97 30 32 44
##  [26] 62 48 87 69 28 17 35  3 92 11 17 66 18 41  6 45 66 16 17 46 65 49 55  1 94
##  [51] 13 93 16 34 18  3 25 44 54  2 80 87 30 71 74 37 48 66 95  6 59 67 72  6 41
##  [76] 69 14 59 95 52 58 15 34 48 24 24 86 40 25 15 87 55 74  2 56  4 42 10  6 64
compare2<-function(x,y) {
  x[2]<y[2]
}
lst<-lstsort(lst,compare2)
sapply(lst,function(x) x[2])
##   [1]  1  1  2  2  3  3  3  4  4  6  6  6  6  6 10 11 13 14 14 15 15 16 16 17 17
##  [26] 17 17 17 18 18 24 24 25 25 26 28 29 30 30 30 32 32 34 34 35 35 37 40 41 41
##  [51] 42 44 44 45 46 48 48 48 49 49 49 50 52 54 55 55 55 56 58 59 59 62 62 62 64
##  [76] 65 66 66 66 67 67 69 69 71 72 74 74 80 85 86 87 87 87 92 92 93 94 95 95 97
  1. sort a dataframe with user-defined function

The logic is the same as above, the only thing to notice is that the comparison function takes two rows and compare them

dfmerge<-function(d1,d2,comp) {
  m<-nrow(d1)
  n<-nrow(d2)
  i<-1
  j<-1
  output<-data.frame(matrix(NA,nrow=m+n,ncol=length(colnames(d1))))
  colnames(output)<-colnames(d1)
 while(i<=m&&j<=n) {
    if(comp(d1[i,],d2[j,])) {
      output[(i+j-1),]<-d1[i,]
      i<-i+1
    } else {
      output[(i+j-1),]<-d2[j,]
      j<-j+1
    }
  }
  while(i<=m) {
     output[(i+j-1),]<-d1[i,]
      i<-i+1
  }
   while(j<=n) {
    output[(i+j-1),]<-d2[j,]
      j<-j+1
   }
  output
  
}

dfsort<-function(df,comp) {
  n<-nrow(df)
  if(n<=1) {
    return(df)
  }
  mid<-ceiling(n/2)
  lft<-dfsort(df[1:mid,],comp)
  rt<-dfsort(df[(mid+1):n,],comp)
  return(dfmerge(lft,rt,comp))
  
}
M<-matrix(sample(1:100,300,replace=TRUE),ncol=3)
df<-data.frame(M)
colnames(df)<-c("a","b","c")
df
##       a   b   c
## 1    64  59  27
## 2    63  94  97
## 3    34  65  39
## 4    49   8  10
## 5    57  64  31
## 6    26  91  46
## 7    13  83  23
## 8    16  15  28
## 9    41  23   4
## 10   83  53  32
## 11   75  52  84
## 12   66  99  82
## 13    6  74  86
## 14   70  18   5
## 15   56  70   8
## 16   61  85  44
## 17   41  76  59
## 18   88  18  42
## 19   27  32  72
## 20   49  62  89
## 21   66  94  87
## 22    8  48  76
## 23   45  66  82
## 24    5   2  49
## 25   19  38  31
## 26   61  54  59
## 27   31  10  37
## 28   55  84  54
## 29   77  91   9
## 30   21  84  38
## 31   50   5  50
## 32    6  26  86
## 33  100   3  11
## 34   41  45  31
## 35   57  94  80
## 36   13  17  10
## 37   13  17  18
## 38   25  99  65
## 39   56  74  46
## 40   81 100  84
## 41    9  63  91
## 42   70  71  80
## 43   80  39  26
## 44   73  66  17
## 45   91  29  97
## 46  100  74  22
## 47   36  46  89
## 48   92  87  30
## 49   83  49  55
## 50    5  99  20
## 51   36   8  95
## 52   91  74  17
## 53   47  93 100
## 54   59  95  46
## 55   88  89  90
## 56   30  40  44
## 57   34   4  92
## 58   63  63  13
## 59   73  78  64
## 60    2  46   2
## 61    9  57  85
## 62   46  21  44
## 63   96  27  26
## 64    6   9  75
## 65    5  41  26
## 66    4  82  82
## 67   12  16  78
## 68   89  56  17
## 69   55  32  94
## 70    7  69  24
## 71   90   1  35
## 72   69   7  43
## 73   24  50  92
## 74   52   8  28
## 75   95  70  73
## 76   97  47  64
## 77   17  90  36
## 78    8  38  34
## 79   70  47  11
## 80   33  70 100
## 81   91  76  63
## 82   70  20  22
## 83   55  14  88
## 84   38  14  32
## 85   72  31  97
## 86   60  75  42
## 87   89  62  91
## 88   86   6  19
## 89   37  56  87
## 90   21  66  76
## 91   54  39  90
## 92   20  62  12
## 93   53  98  30
## 94   27  84  39
## 95   70  95  41
## 96   99  28  73
## 97    6  41  37
## 98   33  53  72
## 99   35  80  92
## 100  85  91  69
dfcomp<-function(r1,r2) {
  r1["b"][1]<r2["b"][1]
}


df<-dfsort(df,dfcomp)
df
##       a   b   c
## 1    90   1  35
## 2     5   2  49
## 3   100   3  11
## 4    34   4  92
## 5    50   5  50
## 6    86   6  19
## 7    69   7  43
## 8    52   8  28
## 9    36   8  95
## 10   49   8  10
## 11    6   9  75
## 12   31  10  37
## 13   38  14  32
## 14   55  14  88
## 15   16  15  28
## 16   12  16  78
## 17   13  17  18
## 18   13  17  10
## 19   88  18  42
## 20   70  18   5
## 21   70  20  22
## 22   46  21  44
## 23   41  23   4
## 24    6  26  86
## 25   96  27  26
## 26   99  28  73
## 27   91  29  97
## 28   72  31  97
## 29   55  32  94
## 30   27  32  72
## 31    8  38  34
## 32   19  38  31
## 33   54  39  90
## 34   80  39  26
## 35   30  40  44
## 36    6  41  37
## 37    5  41  26
## 38   41  45  31
## 39    2  46   2
## 40   36  46  89
## 41   70  47  11
## 42   97  47  64
## 43    8  48  76
## 44   83  49  55
## 45   24  50  92
## 46   75  52  84
## 47   33  53  72
## 48   83  53  32
## 49   61  54  59
## 50   37  56  87
## 51   89  56  17
## 52    9  57  85
## 53   64  59  27
## 54   20  62  12
## 55   89  62  91
## 56   49  62  89
## 57   63  63  13
## 58    9  63  91
## 59   57  64  31
## 60   34  65  39
## 61   21  66  76
## 62   73  66  17
## 63   45  66  82
## 64    7  69  24
## 65   33  70 100
## 66   95  70  73
## 67   56  70   8
## 68   70  71  80
## 69   91  74  17
## 70  100  74  22
## 71   56  74  46
## 72    6  74  86
## 73   60  75  42
## 74   91  76  63
## 75   41  76  59
## 76   73  78  64
## 77   35  80  92
## 78    4  82  82
## 79   13  83  23
## 80   27  84  39
## 81   21  84  38
## 82   55  84  54
## 83   61  85  44
## 84   92  87  30
## 85   88  89  90
## 86   17  90  36
## 87   85  91  69
## 88   77  91   9
## 89   26  91  46
## 90   47  93 100
## 91   57  94  80
## 92   66  94  87
## 93   63  94  97
## 94   70  95  41
## 95   59  95  46
## 96   53  98  30
## 97    5  99  20
## 98   25  99  65
## 99   66  99  82
## 100  81 100  84