Question

I am trying to read a string dataset from a HDF5 file in C# into a array of strings. I was able to read into the dataset using the following code:

//read the no of rows and columns
var datasetID = H5D.open(fileId,"dimensions");
var dataTypeId = H5D.getType(datasetID);
var dataType = H5T.getClass(dataTypeId);
var length = H5T.getSize(dataTypeId);
int[] dDim = new int[length];

H5D.read(datasetID, dataTypeId, new H5Array<int>(dDim));

I tried to do the same for string dataset but I get all the values initialized to null. So I referred this link (https://www.mail-archive.com/hdf-forum@hdfgroup.org/msg02980.html). I was able to read them as bytes, but I don't know the size the byte array should be initialized to. The code i have right now to read string is this:

//read string
datasetID = H5D.open(fileId, "names");
var dataSpaceId = H5D.getSpace(datasetID);
long[] dims = H5S.getSimpleExtentDims(dataSpaceId);
dataTypeId = H5T.copy(H5T.H5Type.C_S1);

//hard coding the no of string to read (213)
byte[] buffer = new byte[dims[0]*213]; 
Console.WriteLine(dims[0]);
H5D.read(datasetID, dataTypeId, new H5Array<byte>(buffer));
Console.WriteLine(System.Text.ASCIIEncoding.ASCII.GetString(buffer)); `.
Was it helpful?

Solution

If you do not know in advance what your data type will be, try the following code. It is incomplete for data types but it is easily modifiable:

public static Array Read1DArray(this H5FileId fileId, string dataSetName)
    {
        var dataset = H5D.open(fileId, dataSetName);
        var space = H5D.getSpace(dataset);
        var dims = H5S.getSimpleExtentDims(space);
        var dtype = H5D.getType(dataset);

        var size = H5T.getSize(dtype);
        var classID = H5T.getClass(dtype);

        var rank = H5S.getSimpleExtentNDims(space);
        var status = H5S.getSimpleExtentDims(space);

        // Read data into byte array
        var dataArray = new Byte[status[0]*size];
        var wrapArray = new H5Array<Byte>(dataArray);
        H5D.read(dataset, dtype, wrapArray);

        // Convert types
        Array returnArray = null;
        Type dataType = null;

        switch (classID)
        {
            case H5T.H5TClass.STRING:
                dataType = typeof(string);
                break;

            case H5T.H5TClass.FLOAT:
                if (size == 4)
                    dataType = typeof(float);
                else if (size == 8)
                    dataType = typeof(double);
                break;

            case H5T.H5TClass.INTEGER:
                if (size == 2)
                    dataType = typeof(Int16);
                else if (size == 4)
                    dataType = typeof(Int32);
                else if (size == 8)
                    dataType = typeof(Int64);
                break;

        }

        if (dataType == typeof (string))
        {
            var cSet = H5T.get_cset(dtype);

            string[] stringArray = new String[status[0]];

            for (int i = 0; i < status[0]; i++)
            {
                byte[] buffer = new byte[size];
                Array.Copy(dataArray, i*size, buffer, 0, size);

                Encoding enc = null;
                switch (cSet)
                {
                    case H5T.CharSet.ASCII:
                        enc = new ASCIIEncoding();
                        break;
                    case H5T.CharSet.UTF8:
                        enc = new UTF8Encoding();
                        break;
                    case H5T.CharSet.ERROR:
                        break;
                }

                stringArray[i] = enc.GetString(buffer).TrimEnd('\0');
            }

            returnArray = stringArray;
        }
        else
        {
            returnArray = Array.CreateInstance(dataType, status[0]);
            Buffer.BlockCopy(dataArray, 0, returnArray, 0, (int) status[0]*size);
        }

        H5S.close(space);
        H5T.close(dtype);
        H5D.close(dataset);

        return returnArray;
    }

OTHER TIPS

your start was exceptionally helpful! With it and some help from HDF5 Example code, I was able to come up with some generic extensions, that would reduce your code to:

//read string
string[] datasetValue = fileId.Read1DArray<string>("names");

The extensions look something like this (which is, or should be, exactly the same as from the referenced question.):

public static class HdfExtensions
{
    // thank you https://stackoverflow.com/questions/4133377/splitting-a-string-number-every-nth-character-number
    public static IEnumerable<String> SplitInParts(this String s, Int32 partLength)
    {
        if (s == null)
            throw new ArgumentNullException("s");
        if (partLength <= 0)
            throw new ArgumentException("Part length has to be positive.", "partLength");

        for (var i = 0; i < s.Length; i += partLength)
            yield return s.Substring(i, Math.Min(partLength, s.Length - i));
    }

    public static T[] Read1DArray<T>(this H5FileId fileId, string dataSetName)
    {
        var dataset = H5D.open(fileId, dataSetName);
        var space = H5D.getSpace(dataset);
        var dims = H5S.getSimpleExtentDims(space);
        var dataType = H5D.getType(dataset);
        if (typeof(T) == typeof(string))
        {
            int stringLength = H5T.getSize(dataType);
            byte[] buffer = new byte[dims[0] * stringLength];
            H5D.read(dataset, dataType, new H5Array<byte>(buffer));
            string stuff = System.Text.ASCIIEncoding.ASCII.GetString(buffer);
            return stuff.SplitInParts(stringLength).Select(ss => (T)(object)ss).ToArray();
        }
        T[] dataArray = new T[dims[0]];
        var wrapArray = new H5Array<T>(dataArray);
        H5D.read(dataset, dataType, wrapArray);
        return dataArray;
    }

    public static T[,] Read2DArray<T>(this H5FileId fileId, string dataSetName)
    {
        var dataset = H5D.open(fileId, dataSetName);
        var space = H5D.getSpace(dataset);
        var dims = H5S.getSimpleExtentDims(space);
        var dataType = H5D.getType(dataset);
        if (typeof(T) == typeof(string))
        {
             // this will also need a string hack...
        }
        T[,] dataArray = new T[dims[0], dims[1]];
        var wrapArray = new H5Array<T>(dataArray);
        H5D.read(dataset, dataType, wrapArray);
        return dataArray;
    }
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top