In applications such as data warehousing or data exchange, the ability to efficiently generate and query provenance information is crucial to understand the origin of data. In this chapter, we review some of the main contributions of Perm, a DBMS that generates different types of provenance information for complex SQL queries (including nested and correlated subqueries and aggregation). The two key ideas behind Perm are representing data and its provenance together in a single relation and relying on query rewrites to generate this representation. Through this, Perm supports fully integrated, on-demand provenance generation and querying using SQL. Since Perm rewrites a query requesting provenance into a regular SQL query and generates easily optimizable SQL code, its performance greatly benefits from the query optimization techniques provided by the underlying DBMS.
@article{GM13, author = {Glavic, Boris and Miller, Ren{\'e}e J. and Alonso, Gustavo}, date-added = {2013-07-09 22:54:03 +0000}, date-modified = {2013-08-22 22:56:39 +0000}, journal = {{In search of elegance in the theory and practice of computation: a Festschrift in honour of Peter Buneman}}, keywords = {Perm; Provenance}, pages = {291-320}, pdfurl = {http://www.cs.uic.edu/%7ebglavic/dbgroup/assets/pdfpubls/GM13.pdf}, projects = {Perm}, title = {Using SQL for Efficient Generation and Querying of Provenance Information}, venueshort = {Festschrift Peter Buneman}, year = {2013}, bdsk-url-1 = {http://www.cs.uic.edu/%7ebglavic/dbgroup/assets/pdfpubls/GM13.pdf} }