We present a method that achieves state-of-the-art results for synthesizing
novel views of complex scenes by optimizing an underlying continuous volumetric
scene function using a sparse set of input views. Our algorithm represents a
scene using a fully-connected (non-convolutional) deep network, whose input is
a single continuous 5D coordinate (spatial location $(x,y,z)$ and viewing
direction $(þeta, \phi)$) and whose output is the volume density and
view-dependent emitted radiance at that spatial location. We synthesize views
by querying 5D coordinates along camera rays and use classic volume rendering
techniques to project the output colors and densities into an image. Because
volume rendering is naturally differentiable, the only input required to
optimize our representation is a set of images with known camera poses. We
describe how to effectively optimize neural radiance fields to render
photorealistic novel views of scenes with complicated geometry and appearance,
and demonstrate results that outperform prior work on neural rendering and view
synthesis. View synthesis results are best viewed as videos, so we urge readers
to view our supplementary video for convincing comparisons.
%0 Generic
%1 mildenhall2020representing
%A Mildenhall, Ben
%A Srinivasan, Pratul P.
%A Tancik, Matthew
%A Barron, Jonathan T.
%A Ramamoorthi, Ravi
%A Ng, Ren
%D 2020
%K deeplearning eccv20 neural_rendering view_sythesis
%T NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis
%U http://arxiv.org/abs/2003.08934
%X We present a method that achieves state-of-the-art results for synthesizing
novel views of complex scenes by optimizing an underlying continuous volumetric
scene function using a sparse set of input views. Our algorithm represents a
scene using a fully-connected (non-convolutional) deep network, whose input is
a single continuous 5D coordinate (spatial location $(x,y,z)$ and viewing
direction $(þeta, \phi)$) and whose output is the volume density and
view-dependent emitted radiance at that spatial location. We synthesize views
by querying 5D coordinates along camera rays and use classic volume rendering
techniques to project the output colors and densities into an image. Because
volume rendering is naturally differentiable, the only input required to
optimize our representation is a set of images with known camera poses. We
describe how to effectively optimize neural radiance fields to render
photorealistic novel views of scenes with complicated geometry and appearance,
and demonstrate results that outperform prior work on neural rendering and view
synthesis. View synthesis results are best viewed as videos, so we urge readers
to view our supplementary video for convincing comparisons.
@misc{mildenhall2020representing,
abstract = {We present a method that achieves state-of-the-art results for synthesizing
novel views of complex scenes by optimizing an underlying continuous volumetric
scene function using a sparse set of input views. Our algorithm represents a
scene using a fully-connected (non-convolutional) deep network, whose input is
a single continuous 5D coordinate (spatial location $(x,y,z)$ and viewing
direction $(\theta, \phi)$) and whose output is the volume density and
view-dependent emitted radiance at that spatial location. We synthesize views
by querying 5D coordinates along camera rays and use classic volume rendering
techniques to project the output colors and densities into an image. Because
volume rendering is naturally differentiable, the only input required to
optimize our representation is a set of images with known camera poses. We
describe how to effectively optimize neural radiance fields to render
photorealistic novel views of scenes with complicated geometry and appearance,
and demonstrate results that outperform prior work on neural rendering and view
synthesis. View synthesis results are best viewed as videos, so we urge readers
to view our supplementary video for convincing comparisons.},
added-at = {2021-06-26T11:05:11.000+0200},
author = {Mildenhall, Ben and Srinivasan, Pratul P. and Tancik, Matthew and Barron, Jonathan T. and Ramamoorthi, Ravi and Ng, Ren},
biburl = {https://www.bibsonomy.org/bibtex/29f090505cbe1b7aef53105a013317567/shuncheng.wu},
description = {2003.08934.pdf},
interhash = {b41445ecd9933674fb3d3e396a736de4},
intrahash = {9f090505cbe1b7aef53105a013317567},
keywords = {deeplearning eccv20 neural_rendering view_sythesis},
note = {cite arxiv:2003.08934Comment: ECCV 2020 (oral). Project page with videos and code: http://tancik.com/nerf},
timestamp = {2021-06-26T11:05:11.000+0200},
title = {NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis},
url = {http://arxiv.org/abs/2003.08934},
year = 2020
}