We present the first query-based approach for explaining missing answers to queries over nested relational data which is a common data format used by big data systems such as Apache Spark. Our main contributions are a novel way to define query-based why-not provenance based on repairs to queries and presenting an implementation and preliminary experiments for answering such queries in Spark.
@inproceedings{DG19a,
author = {Diestelk\"amper, Ralf and Glavic, Boris and Herschel, Melanie and Lee, Seokki},
booktitle = {Proceedings of the 11th USENIX Workshop on the Theory and Practice of Provenance},
isworkshop = {true},
keywords = {Provenance; Missing Answers},
pdfurl = {http://www.cs.uic.edu/%7ebglavic/dbgroup/assets/pdfpubls/DG19.pdf},
title = {Query-based Why-not Explanations for Nested Data},
venueshort = {TaPP},
year = {2019}
}