Neural networks have in recent years shown promise for helping software
engineers write programs and even formally verify them. While semantic
information plays a crucial part in these processes, it remains unclear to what
degree popular neural architectures like transformers are capable of modeling
that information. This paper examines the behavior of neural networks learning
algorithms relevant to programs and formal verification proofs through the lens
of mechanistic interpretability, focusing in particular on structural
recursion. Structural recursion is at the heart of tasks on which symbolic
tools currently outperform neural models, like inferring semantic relations
between datatypes and emulating program behavior. We evaluate the ability of
transformer models to learn to emulate the behavior of structurally recursive
functions from input-output examples. Our evaluation includes empirical and
conceptual analyses of the limitations and capabilities of transformer models
in approximating these functions, as well as reconstructions of the ``shortcut"
algorithms the model learns. By reconstructing these algorithms, we are able to
correctly predict 91 percent of failure cases for one of the approximated
functions. Our work provides a new foundation for understanding the behavior of
neural networks that fail to solve the very tasks they are trained for.
Description
Can Transformers Learn to Solve Problems Recursively?
%0 Generic
%1 zhang2023transformers
%A Zhang, Shizhuo Dylan
%A Tigges, Curt
%A Biderman, Stella
%A Raginsky, Maxim
%A Ringer, Talia
%D 2023
%K ai computation llm programming recursion self-promotion state-space-models transformers
%T Can Transformers Learn to Solve Problems Recursively?
%U http://arxiv.org/abs/2305.14699
%X Neural networks have in recent years shown promise for helping software
engineers write programs and even formally verify them. While semantic
information plays a crucial part in these processes, it remains unclear to what
degree popular neural architectures like transformers are capable of modeling
that information. This paper examines the behavior of neural networks learning
algorithms relevant to programs and formal verification proofs through the lens
of mechanistic interpretability, focusing in particular on structural
recursion. Structural recursion is at the heart of tasks on which symbolic
tools currently outperform neural models, like inferring semantic relations
between datatypes and emulating program behavior. We evaluate the ability of
transformer models to learn to emulate the behavior of structurally recursive
functions from input-output examples. Our evaluation includes empirical and
conceptual analyses of the limitations and capabilities of transformer models
in approximating these functions, as well as reconstructions of the ``shortcut"
algorithms the model learns. By reconstructing these algorithms, we are able to
correctly predict 91 percent of failure cases for one of the approximated
functions. Our work provides a new foundation for understanding the behavior of
neural networks that fail to solve the very tasks they are trained for.
@misc{zhang2023transformers,
abstract = {Neural networks have in recent years shown promise for helping software
engineers write programs and even formally verify them. While semantic
information plays a crucial part in these processes, it remains unclear to what
degree popular neural architectures like transformers are capable of modeling
that information. This paper examines the behavior of neural networks learning
algorithms relevant to programs and formal verification proofs through the lens
of mechanistic interpretability, focusing in particular on structural
recursion. Structural recursion is at the heart of tasks on which symbolic
tools currently outperform neural models, like inferring semantic relations
between datatypes and emulating program behavior. We evaluate the ability of
transformer models to learn to emulate the behavior of structurally recursive
functions from input-output examples. Our evaluation includes empirical and
conceptual analyses of the limitations and capabilities of transformer models
in approximating these functions, as well as reconstructions of the ``shortcut"
algorithms the model learns. By reconstructing these algorithms, we are able to
correctly predict 91 percent of failure cases for one of the approximated
functions. Our work provides a new foundation for understanding the behavior of
neural networks that fail to solve the very tasks they are trained for.},
added-at = {2023-08-30T10:25:43.000+0200},
author = {Zhang, Shizhuo Dylan and Tigges, Curt and Biderman, Stella and Raginsky, Maxim and Ringer, Talia},
biburl = {https://www.bibsonomy.org/bibtex/2383453582ea6a53841d99cf52582cdc4/tabularii},
description = {Can Transformers Learn to Solve Problems Recursively?},
interhash = {76e97b6aa2864e010d417317a8382f0f},
intrahash = {383453582ea6a53841d99cf52582cdc4},
keywords = {ai computation llm programming recursion self-promotion state-space-models transformers},
note = {cite arxiv:2305.14699},
timestamp = {2023-10-01T15:32:12.000+0200},
title = {Can Transformers Learn to Solve Problems Recursively?},
url = {http://arxiv.org/abs/2305.14699},
year = 2023
}