In a PDF417 barcode where number of columns are fixed, how would I calculate the number of rows required for some text?

Question 1

You could look at the source-code of some PDF417 implementation, such as ZXing.

The text encoding isn't just two characters per code-word. If you use any other character than uppercase letters and space, the encoder will add extra characters to switch character-sets etc. You really have to encode the text to see how many code-words it will become.

public class Test
{
    public static void main(String[] args)
    {
        String msg = "Hello, world!";
        int columns = 7;
        int sourceCodeWords = calculateSourceCodeWords(msg);
        int errorCorrectionCodeWords = getErrorCorrectionCodewordCount(0);
        int rows = calculateNumberOfRows(sourceCodeWords, errorCorrectionCodeWords, columns);
        System.out.printf("\"%s\" requires %d code-words, and %d error correction code-words. This becomes %d rows.%n",
                msg, sourceCodeWords, errorCorrectionCodeWords, rows);
    }


    public static int calculateNumberOfRows(int sourceCodeWords, int errorCorrectionCodeWords, int columns) {
        int rows = ((sourceCodeWords + 1 + errorCorrectionCodeWords) / columns) + 1;
        if (columns * rows >= (sourceCodeWords + 1 + errorCorrectionCodeWords + columns)) {
            rows--;
        }
        return rows;
    }

    public static int getErrorCorrectionCodewordCount(int errorCorrectionLevel) {
        if (errorCorrectionLevel < 0 || errorCorrectionLevel > 8) {
            throw new IllegalArgumentException("Error correction level must be between 0 and 8!");
        }
        return 1 << (errorCorrectionLevel + 1);
    }

    private static boolean isAlphaUpper(char ch) {
        return ch == ' ' || (ch >= 'A' && ch <= 'Z');
    }

    private static boolean isAlphaLower(char ch) {
        return ch == ' ' || (ch >= 'a' && ch <= 'z');
    }

    private static boolean isMixed(char ch) {
        return "\t\r #$%&*+,-./0123456789:=^".indexOf(ch) > -1;
    }

    private static boolean isPunctuation(char ch) {
        return "\t\n\r!\"$'()*,-./:;<>?@[\\]_`{|}~".indexOf(ch) > -1;
    }

    private static final int SUBMODE_ALPHA = 0;
    private static final int SUBMODE_LOWER = 1;
    private static final int SUBMODE_MIXED = 2;
    private static final int SUBMODE_PUNCTUATION = 3;

    public static int calculateSourceCodeWords(String msg)
    {
        int len = 0;
        int submode = SUBMODE_ALPHA;
        int msgLength = msg.length();
        for (int idx = 0; idx < msgLength;)
        {
            char ch = msg.charAt(idx);
            switch (submode)
            {
                case SUBMODE_ALPHA:
                    if (isAlphaUpper(ch))
                    {
                        len++;
                    }
                    else
                    {
                        if (isAlphaLower(ch))
                        {
                            submode = SUBMODE_LOWER;
                            len++;
                            continue;
                        }
                        else if (isMixed(ch))
                        {
                            submode = SUBMODE_MIXED;
                            len++;
                            continue;
                        }
                        else
                        {
                            len += 2;
                            break;
                        }
                    }
                    break;
                case SUBMODE_LOWER:
                    if (isAlphaLower(ch))
                    {
                        len++;
                    }
                    else
                    {
                        if (isAlphaUpper(ch))
                        {
                            len += 2;
                            break;
                        }
                        else if (isMixed(ch))
                        {
                            submode = SUBMODE_MIXED;
                            len++;
                            continue;
                        }
                        else
                        {
                            len += 2;
                            break;
                        }
                    }
                    break;
                case SUBMODE_MIXED:
                    if (isMixed(ch))
                    {
                        len++;
                    }
                    else
                    {
                        if (isAlphaUpper(ch))
                        {
                            submode = SUBMODE_ALPHA;
                            len++;
                            continue;
                        }
                        else if (isAlphaLower(ch))
                        {
                            submode = SUBMODE_LOWER;
                            len++;
                            continue;
                        }
                        else
                        {
                            if (idx + 1 < msgLength)
                            {
                                char next = msg.charAt(idx + 1);
                                if (isPunctuation(next))
                                {
                                    submode = SUBMODE_PUNCTUATION;
                                    len++;
                                    continue;
                                }
                            }
                            len += 2;
                        }
                    }
                    break;
                default:
                    if (isPunctuation(ch))
                    {
                        len++;
                    }
                    else
                    {
                        submode = SUBMODE_ALPHA;
                        len++;
                        continue;
                    }
                    break;
            }
            idx++; // Don't increment if 'continue' was used.
        }
        return (len + 1) / 2;
    }
}

Output:

"Hello, world!" requires 9 code-words, and 2 error correction code-words. This becomes 2 rows.

Question 2

I've made a Python port of Markus Jarderot's answer. The calculation remains the same.

import string

SUBMODE_ALPHA = string.ascii_uppercase + ' '
SUBMODE_LOWER = string.ascii_lowercase + ' '
SUBMODE_MIXED = "\t\r #$%&*+,-./0123456789:=^"
SUBMODE_PUNCTUATION = "\t\n\r!\"$'()*,-./:;<>?@[\\]_`{|}~"


def calculateNumberOfRows(sourceCodeWords, errorCorrectionCodeWords, columns):
    rows = ((sourceCodeWords + 1 + errorCorrectionCodeWords) / columns) + 1
    if columns * rows >= sourceCodeWords + 1 + errorCorrectionCodeWords + columns:
        rows -= 1
    return rows

def getErrorCorrectionCodewordCount(errorCorrectionLevel):
    if 0 > errorCorrectionLevel > 8:
        raise ValueError("Error correction level must be between 0 and 8!")
    return 1 << (errorCorrectionLevel + 1)


def calculateSourceCodeWords(msg):
    length = 0;
    submode = SUBMODE_ALPHA
    msgLength = len(msg)
    idx = 0
    while(idx < msgLength):
        ch = msg[idx]
        length += 1

        if not ch in submode:
            old_submode = submode
            if submode == SUBMODE_ALPHA:
                for mode in (SUBMODE_LOWER, SUBMODE_MIXED):
                    if ch in mode:
                        submode = mode

            elif submode == SUBMODE_LOWER:
                if ch in SUBMODE_MIXED:
                    submode = SUBMODE_MIXED

            elif submode == SUBMODE_MIXED:
                for mode in (SUBMODE_ALPHA, SUBMODE_LOWER):
                    if ch in mode:
                        submode = mode

                if idx + 1 < len(msg) and msg[idx + 1] in SUBMODE_PUNCTUATION:
                    submode = SUBMODE_PUNCTUATION


            elif submode == SUBMODE_PUNCTUATION:
                submode = SUBMODE_ALPHA

            if old_submode != submode:
                # submode changed
                continue

            length += 1

        idx += 1 # Don't increment if 'continue' was used.
    return (length + 1) / 2


def main():
    msg = "Hello, world!"
    columns = 7
    sourceCodeWords = calculateSourceCodeWords(msg)
    errorCorrectionCodeWords = getErrorCorrectionCodewordCount(0)
    rows = calculateNumberOfRows(sourceCodeWords, errorCorrectionCodeWords, columns)
    print("\"%s\" requires %d code-words, and %d error correction code-words. This becomes %d rows.\n"
           %( msg, sourceCodeWords, errorCorrectionCodeWords, rows))



if __name__ == '__main__':
    main()