select most frequent element in dataframe while using table

Question 1

Using table (and lapply):

ff = function(pronounce, word) 
{
    tab = table(word, pronounce)
    data.frame(pronounciation = colnames(tab)[max.col(tab, "random")], 
               words = rownames(tab),
               occurences = unname(rowSums(tab)))
}

lapply(ls, function(x) ff(x$pronounciation, x$words))

#[[1]]
#     pronounciation words occurences
#1        h<U+026A>m   him          4
#2 <U+00F0><U+025B>m  them          1
#3        ju<U+02D0>   you          7
#
#[[2]]
#  pronounciation words occurences
#1      e<U+026A>     a          8
#2      <U+00F0>i   the          7
#
#[[3]]
#      pronounciation words occurences
#1         w<U+028C>n   one          6
#2 θ<U+0279>i<U+02D0> three          1
#3         tu<U+02D0>   two          8

Question 2

Here is a solution using data.table that I think gets what you were originally after, where occurrence is the total number of appearances of each word, not the number of the (word,pronunciation) pair:

dtlist<-list(pronouns,articles,numbers)
lapply(dtlist,setDT)

common_r<-function(x){
  t<-sort(table(x),decreasing=T)
  n<-length(t[t==max(t)])
  c<-if (n>1)names(t)[ceiling(n*runif(1))] else names(t)[1]
  c
}
lapply(dtlist,function(x)setcolorder(x[,.(occurrence=.N,
                                       pronunciation=common_r(pronunciation)),
                                       by=words]),
                                     c("pronunciation","words","occurrence")))

Output:

[[1]]
   pronunciation words occurrence
1:           juː   you          7
2:           hɪm   him          4
3:           ðɛm  them          1

[[2]]
   pronunciation words occurrence
1:            ði   the          7
2:            eɪ     a          8

[[3]]
   pronunciation words occurrence
1:           wʌn   one          6
2:           tuː   two          8
3:          θɹiː three          1

Note that I've taken care to randomize when the most common pronunciation is not unique; if it's always unique (or if you don't care which pronunciation is chosen in this case), this can be simplified:

common_r<-function(x){names(sort(table(x),decreasing=T))[1]}

And the output can further be simplified if you don't want to carry around 3 separate lists for the different word categories by wrapping lapply in rbindlist:

   pronunciation words occurrence
1:           juː   you          7
2:           hɪm   him          4
3:           ðɛm  them          1
4:            ði   the          7
5:            eɪ     a          8
6:           wʌn   one          6
7:           tuː   two          8
8:          θɹiː three          1

We could also add a category field to this new data.table saying which word category each came from.

Question 3

Using the data.table library -

library(data.table)

dtlist<-list(pronouns,articles,numbers)
lapply(dtlist,setDT)

# for each data.table in the dtlist, calculate frequency by pron, words
dtlistfreq1 <- 
  lapply(dtlist, function(x) x[,.(freq = .N), by = .(pronunciation,words)])
# for each data.table in the dtlistfreq, pick the highest freq by words
dtlistfreq2 <- 
  lapply(dtlistfreq1, function(x) x[,.SD[which.max(freq)], by = .(words)])

Output

> dtlistfreq2 
[[1]]
   words pronounciation freq
1:   you            ju?    3
2:   him            h?m    4
3:  them            ð?m    1

[[2]]
   words pronounciation freq
1:   the             ði    3
2:     a             e?    5

[[3]]
   words pronounciation freq
1:   one            w?n    4
2:   two            tu?    6
3: three           ??i?    1