@inproceedings{f4190ea2f85f49e59410ce61f85b049d,
title = "Jigsaw: Efficient optimization over uncertain enterprise data",
abstract = "Probabilistic databases, in particular ones that allow users to externally define models or probability distributions - so called VG-Functions - are an ideal tool for constructing, simulating and analyzing hypothetical business scenarios. Enterprises often use such tools with parameterized models and need to explore a large parameter space in order to discover parameter values that optimize for a given goal. Parameter space is usually very large, making such exploration extremely expensive. We present Jigsaw, a probabilistic database-based simulation framework that addresses this performance problem. In Jigsaw, users define what-if style scenarios as parameterized probabilistic database queries and identify parameter values that achieve desired properties. Jigsaw uses a novel {"}fingerprinting{"} technique that efficiently identifies correlations between a query's output distribution for different parameter values. Using fingerprints, Jigsaw is able to reuse work performed for different parameter values, and obtain speedups of as much as 2 orders of magnitude for several real business scenarios.",
keywords = "black box, Monte Carlo, probabilistic database, simulation",
author = "Kennedy, \{Oliver A.\} and Suman Nath",
year = "2011",
doi = "10.1145/1989323.1989410",
language = "English",
isbn = "9781450306614",
series = "Proceedings of the ACM SIGMOD International Conference on Management of Data",
publisher = "Association for Computing Machinery ",
pages = "829--840",
booktitle = "Proceedings of SIGMOD 2011 and PODS 2011",
address = "United States",
note = "2011 ACM SIGMOD and 30th PODS 2011 Conference ; Conference date: 12-06-2011 Through 16-06-2011",
}