We present an architecture and prototype implementation for a generic provenance database middleware (GProM) that is based on the concept of query rewrites, which are applied to an algebraic graph representation of database operations. The system supports a wide range of provenance types and representations for queries, updates, transactions, and operations spanning multiple transactions. GProM supports several strategies for provenance generation, e.g., on-demand, rule-based, and “always on”. To the best of our knowledge, we are the first to present a solution for computing the provenance of concurrent database transactions. Our solution can retroactively trace transaction provenance as long as an audit log and time travel functionality are available (both are supported by most DBMS). Other noteworthy features of GProM include: extensibility through a declarative rewrite rule specification language, support for multiple database backends, and an optimizer for rewritten queries.
@inproceedings{AG14, author = {Arab, Bahareh and Gawlick, Dieter and Radhakrishnan, Venkatesh and Guo, Hao and Glavic, Boris}, booktitle = {Proceedings of the 6th USENIX Workshop on the Theory and Practice of Provenance}, isworkshop = {true}, keywords = {Reenactment; Provenance; Concurrency Control; GProM}, pdfurl = {http://www.cs.uic.edu/%7ebglavic/dbgroup/assets/pdfpubls/AG14.pdf}, projects = {GProM}, slideurl = {http://www.slideshare.net/lordPretzel/tapp-2014-talk-boris}, title = {A Generic Provenance Middleware for Database Queries, Updates, and Transactions}, venueshort = {TaPP}, year = {2014}, bdsk-url-1 = {http://www.cs.uic.edu/%7ebglavic/dbgroup/assets/pdfpubls/AG14.pdf} }