We study the problem of finding liveness violations in real-world asynchronous and distributed systems. Unlike a safety property, which asserts that certain bad states should never occur during execution, a liveness property states that a program should not remain in a bad state for an infinitely long period of time. Checking for liveness violations is essential to ensure that a system will always make progress in production. The violation of a liveness property can be demonstrated by a finite execution where the same system state repeats twice (known as lasso). However, this requires the ability to capture the state precisely, which is arguably impossible in real-world systems. For this reason, previous approaches have instead relied on demonstrating a long execution where the system remains in a bad state. However, this hampers debugging because the produced trace can be very long, making it hard to understand. Our work aims to find liveness violations in real-world systems while still producing lassos as a bug witness. Our technique relies only on partially caching the system state, which is feasible to achieve efficiently in practice. To make up for imprecision in caching, we use retries: a potential lasso, where the same partial state repeats twice, is replayed multiple times to gain certainty that the execution is indeed stuck in a bad state. We have implemented our technique in the P# programming language and evaluated it on real production systems and several challenging academic benchmarks.
%0 Conference Paper
%1 Mudduluru:2017:LDU
%A Mudduluru, Rashmi
%A Deligiannis, Pantazis
%A Desai, Ankush
%A Lal, Akash
%A Qadeer, Shaz
%B Proceedings of the 17th Conference on Formal Methods in Computer-Aided Design
%C Austin, TX
%D 2017
%I FMCAD Inc
%K Detection Distribution Liveness MessagePassing Termination
%P 84--91
%T Lasso Detection Using Partial-state Caching
%U http://dl.acm.org/citation.cfm?id=3168451.3168473
%X We study the problem of finding liveness violations in real-world asynchronous and distributed systems. Unlike a safety property, which asserts that certain bad states should never occur during execution, a liveness property states that a program should not remain in a bad state for an infinitely long period of time. Checking for liveness violations is essential to ensure that a system will always make progress in production. The violation of a liveness property can be demonstrated by a finite execution where the same system state repeats twice (known as lasso). However, this requires the ability to capture the state precisely, which is arguably impossible in real-world systems. For this reason, previous approaches have instead relied on demonstrating a long execution where the system remains in a bad state. However, this hampers debugging because the produced trace can be very long, making it hard to understand. Our work aims to find liveness violations in real-world systems while still producing lassos as a bug witness. Our technique relies only on partially caching the system state, which is feasible to achieve efficiently in practice. To make up for imprecision in caching, we use retries: a potential lasso, where the same partial state repeats twice, is replayed multiple times to gain certainty that the execution is indeed stuck in a bad state. We have implemented our technique in the P# programming language and evaluated it on real production systems and several challenging academic benchmarks.
%@ 978-0-9835678-7-5
@inproceedings{Mudduluru:2017:LDU,
abstract = {We study the problem of finding liveness violations in real-world asynchronous and distributed systems. Unlike a safety property, which asserts that certain bad states should never occur during execution, a liveness property states that a program should not remain in a bad state for an infinitely long period of time. Checking for liveness violations is essential to ensure that a system will always make progress in production. The violation of a liveness property can be demonstrated by a finite execution where the same system state repeats twice (known as lasso). However, this requires the ability to capture the state precisely, which is arguably impossible in real-world systems. For this reason, previous approaches have instead relied on demonstrating a long execution where the system remains in a bad state. However, this hampers debugging because the produced trace can be very long, making it hard to understand. Our work aims to find liveness violations in real-world systems while still producing lassos as a bug witness. Our technique relies only on partially caching the system state, which is feasible to achieve efficiently in practice. To make up for imprecision in caching, we use retries: a potential lasso, where the same partial state repeats twice, is replayed multiple times to gain certainty that the execution is indeed stuck in a bad state. We have implemented our technique in the P# programming language and evaluated it on real production systems and several challenging academic benchmarks.},
acmid = {3168473},
added-at = {2019-08-06T15:39:34.000+0200},
address = {Austin, TX},
author = {Mudduluru, Rashmi and Deligiannis, Pantazis and Desai, Ankush and Lal, Akash and Qadeer, Shaz},
biburl = {https://www.bibsonomy.org/bibtex/2ecc6f9206bf8375c509e306a05a883fe/gron},
booktitle = {Proceedings of the 17th Conference on Formal Methods in Computer-Aided Design},
description = {Lasso detection using partial-state caching},
interhash = {f315a8fe2753835a9f05352fe84403ed},
intrahash = {ecc6f9206bf8375c509e306a05a883fe},
isbn = {978-0-9835678-7-5},
keywords = {Detection Distribution Liveness MessagePassing Termination},
location = {Vienna, Austria},
numpages = {8},
pages = {84--91},
publisher = {FMCAD Inc},
series = {FMCAD '17},
timestamp = {2019-08-06T15:39:34.000+0200},
title = {{Lasso Detection Using Partial-state Caching}},
url = {http://dl.acm.org/citation.cfm?id=3168451.3168473},
year = 2017
}