Question

I am writing a code to calculate the entropy of a string with shannon's entropy.

 Dim entropytext As String = Result.Text

    Dim theresult = entropytext.GroupBy(Function(o) o) _
        .Select(Function(o) New With {.Count = o.Count(), .Character = o.Key}) _
        .GroupBy(Function(o) o.Count, Function(o) o.Character) _
        .OrderByDescending(Function(o) o.Key)

    Dim totalEntropy As Double = 0
    Dim partialEntropy As Double
    Dim partialP As Double

    For Each item In theresult
        Console.Write(item.Key & " of chars: ")

        For Each character In item
            Console.Write(character)
        Next

        partialP = item.Key / entropytext.Count
        Console.Write(". p of each " & partialP & ", total p = " & item.Count * partialP)
        partialEntropy = partialP * Math.Log(partialP) * item.Count
        totalEntropy += partialEntropy
        Console.WriteLine()
    Next

    totalEntropy *= -1
    TextBox1.Text = totalEntropy & " Bits"
End Sub

Math:

Entropy = -∑(P_xlog(P_x))
P_x = N_x/∑(N_x)

where P_x is the probability of letter x,

and N_x is the number of letter x.

So,

textbox1 ='AATC'

Entropy (textbox1)=-([2/4 log(2/4)]+[1/4 log (1/4)]+[1/4 log (1/4)])
= 1.0397

But this is far too low... according to (http://www.shannonentropy.netmark.pl/) it should be "1.5". What am I doing wrong? Thank you in advance!!

Essentially, it should work like this... but I am not competent with c sharp...

public static double ShannonEntropy(string s)
{
var map = new Dictionary<char, int>();
foreach (char c in s)
{
    if (!map.ContainsKey(c))
        map.Add(c, 1);
    else
        map[c] += 1;
}

double result = 0.0;
int len = s.Length;
foreach (var item in map)
{
    var frequency = (double)item.Value / len;
    result -= frequency * (Math.Log(frequency) / Math.Log(2));
}

return result;
}
Was it helpful?

Solution

Here is a direct port of the C# code into VB.NET:

Public Shared Function ShannonEntropy(s As String) As Double
    Dim map = New Dictionary(Of Char, Integer)()
    For Each c As Char In s
        If Not map.ContainsKey(c) Then
            map.Add(c, 1)
        Else
            map(c) += 1
        End If
    Next

    Dim result As Double = 0.0
    Dim len As Integer = s.Length
    For Each item As var In map
        Dim frequency = CDbl(item.Value) / len
        result -= frequency * (Math.Log(frequency) / Math.Log(2))
    Next

    Return result
End Function

If the C# code produces the results you were looking for, this code will give the same results.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top