Mapping result rows to namedtuple in python sqlite

Question 1

There is a much easier way! Sqlite3 provides a way for the user to define "row factories". These row factories take the cursor and the tuple row and can return whatever type of object it wants.

Once you set the row factory with

con.row_factory = my_row_factory

then rows returned by the cursor will be the result of my_row_factory applied to the tuple-row. For example,

import sqlite3
import collections

LanguageRecord = collections.namedtuple('LanguageRecord', 'id name creation_date')
def namedtuple_factory(cursor, row):
    return LanguageRecord(*row)

con = sqlite3.connect(":memory:")
con.row_factory = namedtuple_factory
cur = con.cursor()
cur.execute("select 1,2,3")
print(cur.fetchone())

yields

LanguageRecord(id=1, name=2, creation_date=3)

For another example of how to define a namedtuple factory, see this post.

By the way, if you set

conn.row_factory = sqlite3.Row

then rows are returned as dicts, whose keys are the table's column names. Thus, instead of accessing parts of the namedtuple with things like row.creation_date you could just use the builtin sqlite3.Row row factory and access the equivalent with row['creation_date'].

Question 2

An improved row_factory is actually this, which can be reused for all sorts of queries:

from collections import namedtuple

def namedtuple_factory(cursor, row):
    """Returns sqlite rows as named tuples."""
    fields = [col[0] for col in cursor.description]
    Row = namedtuple("Row", fields)
    return Row(*row)

conn = sqlite3.connect(":memory:")
conn.row_factory = namedtuple_factory
cur = con.cursor()

Question 3

There is another one row_factory on the top of namedtuple:

from collection import namedtuple

def namedtuple_factory(cursor, row, cls=[None]):
    rf = cls[0]
    if rf is None:
        fields = [col[0] for col in cursor.description]
        cls[0] = namedtuple("Row", fields)
        return cls[0](*row)
    return rf(*row)

One can generalize further in order to use other class factories:

def make_row_factory(cls_factory, **kw):
    def row_factory(cursor, row, cls=[None]):
        rf = cls[0]
        if rf is None:
            fields = [col[0] for col in cursor.description]
            cls[0] = cls_factory("Row", fields, **kw)
            return cls[0](*row)
        return rf(*row)
    return row_factory

These factory functions are useful for cases when all query results have same fields.

Examples:

namedtuple_factory = make_row_factory(namedtuple)
import dataclass

row_factory = make_row_factory(dataclass.make_dataclass)
pip3 install recordclass

import recordclass

row_factory = make_row_factory(recordclass.make_dataclass, fast_new=True)

Here are some performance counters to compare different ways (debian linux, 64 bit, python 3.9).

Script for creation test database:

N = 1000000
conn = sqlite3.connect('example.db')
c = conn.cursor()
c.execute('''CREATE TABLE test
             (id int, x real, y real, p int, q int)''')
gen = ((i, random(), random(), randint(0,N), randint(0,N)) for i in range(N))
c.executemany("INSERT INTO test VALUES (?,?,?,?,?)", gen)
conn.commit()
conn.close()

Default:

conn = sqlite3.connect('example.db')
c = conn.cursor()
%time res = [row for row in c.execute("SELECT id,x,y,p,q FROM test")]
conn.close()
print(N * sys.getsizeof(res[0]) // 1000000, 'Mb')

CPU times: user 971 ms, sys: 92.1 ms, total: 1.06 s
Wall time: 1.06 s
80 Mb

sqlite3.Row:

conn = sqlite3.connect('example.db')
conn.row_factory = sqlite3.Row
c = conn.cursor()
%time res = [row for row in c.execute("SELECT id,x,y,p,q FROM test")]
conn.close()
# print(N * sys.getsizeof(res[0]) // 1000000, 'Mb')

CPU times: user 1.11 s, sys: 80.1 ms, total: 1.19 s
Wall time: 1.19 s

namedtuple:

from collections import namedtuple Row = namedtuple("Row", "id x y p q") conn = sqlite3.connect('example.db') c = conn.cursor() %time res = [Row(*row) for row in c.execute("SELECT id,x,y,p,q FROM test")] conn.close() print(N * sys.getsizeof(res[0]) // 1000000, 'Mb')

CPU times: user 1.89 s, sys: 71.8 ms, total: 1.96 s
Wall time: 1.96 s
80 Mb

namedtuple-based row factory:

conn = sqlite3.connect('example.db')
conn.row_factory = make_row_factory(namedtuple)
c = conn.cursor()
%time res = [row for row in c.execute("SELECT id,x,y,p,q FROM test")]
conn.close()
print(N * sys.getsizeof(res[0]) // 1000000, 'Mb')

CPU times: user 1.93 s, sys: 116 ms, total: 2.05 s
Wall time: 2.05 s
80 Mb

recordclass:

from recordclass import make_dataclass
Row = make_dataclass("Row", "id x y p q", fast_new=True)
conn = sqlite3.connect('example.db')
c = conn.cursor()
%time res = [Row(*row) for row in c.execute("SELECT id,x,y,p,q FROM test")]
conn.close()
print(N * sys.getsizeof(res[0]) // 1000000, 'Mb')

CPU times: user 1 s, sys: 72.2 ms, total: 1.08 s
Wall time: 1.07 s
56 Mb

recordclass-based row factory:

conn = sqlite3.connect('example.db')
conn.row_factory = make_row_factory(make_dataclass, fast_new=True)
c = conn.cursor()
%time res = [row for row in c.execute("SELECT id,x,y,p,q FROM test")]
conn.close()
print(N * sys.getsizeof(res[0]) // 1000000, 'Mb')

CPU times: user 1.11 s, sys: 76.2 ms, total: 1.19 s
Wall time: 1.19 s
56 Mb

Question 4

I demonstrate how to take a resulting dataframe from a sql query and convert it into a namedtuple list. I did not dynamic bind the dataframe columns to the nametuple names, not sure if this is possible.

LanguageRecord=namedtuple('Generic',['id','name','creation_date'])
def map_to_language_record(row):
    return LanguageRecord(row.id, row.name, row.creation_date)

df=pd.DataFrame({'id':[1,2,3],'name':['bob','dick','jane'],'creation_date': 
  ['1/1/2021','1/2/2021','1/3/2021']})

languages = list(map(map_to_language_record, df.itertuples()))
print(languages)

output:

[Generic(id=1, name='bob', creation_date='1/1/2021'), Generic(id=2, name='dick', creation_date='1/2/2021'), Generic(id=3, name='jane', creation_date='1/3/2021')]

Question 5

I think better to use for language in map(LanguageRecord._make, c.fetchall()[:1]): Because it can cause IndexError with fetchall()[0].

If you need one result and there is already "WHERE" in query. As I understand query should return one row. Early optimization is evil. :)