[Tutor] catching the row that raises IntegrityError in sqlite
Peter Otten
__peter__ at web.de
Sun Feb 9 11:20:55 CET 2014
Sivaram Neelakantan wrote:
>
> I've written this code that seems to work and I'd like to know how to get
> the record that causes the abort. Apparently 'executemany' doesn't
> support lastrow? And if it doesn't, any suggestions?
>
> --8<---------------cut here---------------start------------->8---
> def table_load(datafile,name,conn,dbh):
> print "processing table ",name
> conn.execute("PRAGMA table_info("+ name +")")
> #parse the resultset to get the col name
> cols= [ x[1] for x in conn.fetchall()]
> cv= ("?" * len(cols))
> with open(datafile,'r') as fin:
> dr = csv.reader(fin, delimiter='|')
> to_db = [tuple(i) for i in dr]
> print "Records read in: ", len(to_db)
> cl=','.join(cols)
> cvv=','.join(cv)
> try:
> sql = "insert into %s (%s) values(%s)" %(name, cl, cvv)
> conn.executemany(sql, to_db)
> dbh.commit()
> except sq.IntegrityError:
> print('Record already exists') # but which record???
> dbh.rollback()
> finally:
> sql= "select count(*) from %s;" %(name)
> (row_cnt,) = conn.execute(sql).fetchone()
> print "rows inserted ", row_cnt
> --8<---------------cut here---------------end--------------->8---
>
> And do tell if I'm doing this try catch bits correctly please.
If nobody here comes up with a good way to find the offending record you
could ask in a mailing list/newsgroup dedicated to sqlite (Please report
back here if you do). If there is no "official" way you might try the
workaround shown below.
The idea here is to wrap the iterable of records to be inserted in the Iter
class which keeps track of the last accessed row.
$ cat sqlite_integrity2.py
import sqlite3
import csv
import sys
class Iter(object):
def __init__(self, items):
self.items = items
def __iter__(self):
for item in self.items:
self.last = item
yield item
def table_load(datafile, name, cursor, db):
print("processing table {}".format(name))
cursor.execute("PRAGMA table_info("+ name +")")
column_names = [descr[1] for descr in cursor.fetchall()]
with open(datafile,'r') as fin:
records = csv.reader(fin, delimiter='|')
records = Iter(records)
sql = "insert or rollback into {name} ({columns})
values({qmarks})".format(
name=name,
columns=", ".join(column_names),
qmarks=", ".join("?"*len(column_names)))
try:
cursor.executemany(sql, records)
except sqlite3.IntegrityError as err:
print("{}: {}".format(err, records.last))
finally:
sql= "select count(*) from {};".format(name)
[row_count] = cursor.execute(sql).fetchone()
print("rows inserted: {}".format(row_count))
if __name__ == "__main__":
filename = sys.argv[1]
db = sqlite3.connect(":memory:")
cursor = db.cursor()
cursor.execute("create table demo (name unique, value);")
table_load(filename, "demo", cursor, db)
$ cat records_
records_conflict.csv records_no_conflict.csv
$ cat records_conflict.csv
alpha|1
beta|2
gamma|3
alpha|4
delta|5
$ python sqlite_integrity2.py records_conflict.csv
processing table demo
column name is not unique: ['alpha', '4']
rows inserted: 0
$ cat records_no_conflict.csv
alpha|1
beta|2
gamma|3
delta|4
$ python sqlite_integrity2.py records_no_conflict.csv
processing table demo
rows inserted: 4
While this approach seems to work at the moment it will of course break
should sqlite decide one day to read records ahead before performing the
integrity test. Therefore I recommend the more conservative road to loop
over the records explicitly:
for row in records:
try:
cursor.execute(sql, row)
except ...
...
More information about the Tutor
mailing list