[Tutor] catching the row that raises IntegrityError in sqlite

Sun Feb 9 11:20:55 CET 2014

Sivaram Neelakantan wrote:

> 
> I've written this code that seems to work and I'd like to know how to get
> the record that causes the abort.  Apparently 'executemany' doesn't
> support lastrow?  And if it doesn't, any suggestions?
> 
> --8<---------------cut here---------------start------------->8---
> def table_load(datafile,name,conn,dbh):
>     print "processing table ",name
>     conn.execute("PRAGMA table_info("+ name +")")
>     #parse the resultset to get the col name
>     cols= [ x[1] for x in conn.fetchall()]
>     cv= ("?" * len(cols))
>     with open(datafile,'r') as fin:
>         dr = csv.reader(fin, delimiter='|')
>         to_db = [tuple(i) for i in dr]
>         print "Records read in: ",  len(to_db)
>     cl=','.join(cols)
>     cvv=','.join(cv)
>     try:
>         sql = "insert into %s (%s) values(%s)" %(name, cl, cvv)
>         conn.executemany(sql, to_db)
>         dbh.commit()
>     except sq.IntegrityError:
>         print('Record already exists') # but which record???
>         dbh.rollback()
>     finally:
>         sql= "select count(*) from %s;" %(name)
>         (row_cnt,) = conn.execute(sql).fetchone()
>         print "rows inserted ", row_cnt
> --8<---------------cut here---------------end--------------->8---
> 
> And do tell if I'm doing this try catch bits correctly please.

If nobody here comes up with a good way to find the offending record you 
could ask in a mailing list/newsgroup dedicated to sqlite (Please report 
back here if you do). If there is no "official" way you might try the 
workaround shown below.

The idea here is to wrap the iterable of records to be inserted in the Iter 
class which keeps track of the last accessed row. 

$ cat sqlite_integrity2.py
import sqlite3
import csv
import sys

class Iter(object):
    def __init__(self, items):
        self.items = items
    def __iter__(self):
        for item in self.items:
            self.last = item
            yield item

def table_load(datafile, name, cursor, db):
    print("processing table {}".format(name))
    cursor.execute("PRAGMA table_info("+ name +")")
    column_names = [descr[1] for descr in cursor.fetchall()]

    with open(datafile,'r') as fin: 
        records = csv.reader(fin, delimiter='|')
        records = Iter(records)
        sql = "insert or rollback into {name} ({columns}) 
values({qmarks})".format(
            name=name,
            columns=", ".join(column_names),
            qmarks=", ".join("?"*len(column_names)))
        try:
            cursor.executemany(sql, records)
        except sqlite3.IntegrityError as err:
            print("{}: {}".format(err, records.last))
        finally:
            sql= "select count(*) from {};".format(name)
            [row_count] = cursor.execute(sql).fetchone()
            print("rows inserted: {}".format(row_count))

if __name__ == "__main__":
    filename = sys.argv[1]
    db = sqlite3.connect(":memory:")
    cursor = db.cursor()
    cursor.execute("create table demo (name unique, value);")
    table_load(filename, "demo", cursor, db)
$ cat records_
records_conflict.csv     records_no_conflict.csv  
$ cat records_conflict.csv 
alpha|1
beta|2
gamma|3
alpha|4
delta|5
$ python sqlite_integrity2.py records_conflict.csv 
processing table demo
column name is not unique: ['alpha', '4']
rows inserted: 0
$ cat records_no_conflict.csv 
alpha|1
beta|2
gamma|3
delta|4
$ python sqlite_integrity2.py records_no_conflict.csv 
processing table demo
rows inserted: 4

While this approach seems to work at the moment it will of course break 
should sqlite decide one day to read records ahead before performing the 
integrity test. Therefore I recommend the more conservative road to loop 
over the records explicitly:

for row in records:
    try:
       cursor.execute(sql, row)
    except ...
        ...