The multi-armed bandit is an important framework for balancing exploration with exploitation in recommendation. Exploitation recommends content (e.g., products, movies, music playlists) with the highest predicted user engagement and has traditionally been the focus of recommender systems. Exploration recommends content with uncertain predicted user engagement for the purpose of gathering more information. The importance of exploration has been recognized in recent years, particularly in settings with new users, new items, non-stationary preferences and attributes. In parallel, explaining recommendations ("recsplanations") is crucial if users are to understand their recommendations. Existing work has looked at bandits and explanations independently. We provide the first method that combines both in a principled manner. In particular, our method is able to jointly (1) learn which explanations each user responds to; (2) learn the best content to recommend for each user; and (3) balance exploration with exploitation to deal with uncertainty. Experiments with historical log data and tests with live production traffic in a large-scale music recommendation service show a significant improvement in user engagement.
%0 Conference Paper
%1 McInerney:2018:EEE:3240323.3240354
%A McInerney, James
%A Lacker, Benjamin
%A Hansen, Samantha
%A Higley, Karl
%A Bouchard, Hugues
%A Gruson, Alois
%A Mehrotra, Rishabh
%B Proceedings of the 12th ACM Conference on Recommender Systems
%C New York, NY, USA
%D 2018
%I ACM
%K explanation recommender
%P 31--39
%R 10.1145/3240323.3240354
%T Explore, Exploit, and Explain: Personalizing Explainable Recommendations with Bandits
%U http://doi.acm.org/10.1145/3240323.3240354
%X The multi-armed bandit is an important framework for balancing exploration with exploitation in recommendation. Exploitation recommends content (e.g., products, movies, music playlists) with the highest predicted user engagement and has traditionally been the focus of recommender systems. Exploration recommends content with uncertain predicted user engagement for the purpose of gathering more information. The importance of exploration has been recognized in recent years, particularly in settings with new users, new items, non-stationary preferences and attributes. In parallel, explaining recommendations ("recsplanations") is crucial if users are to understand their recommendations. Existing work has looked at bandits and explanations independently. We provide the first method that combines both in a principled manner. In particular, our method is able to jointly (1) learn which explanations each user responds to; (2) learn the best content to recommend for each user; and (3) balance exploration with exploitation to deal with uncertainty. Experiments with historical log data and tests with live production traffic in a large-scale music recommendation service show a significant improvement in user engagement.
%@ 978-1-4503-5901-6
@inproceedings{McInerney:2018:EEE:3240323.3240354,
abstract = {The multi-armed bandit is an important framework for balancing exploration with exploitation in recommendation. Exploitation recommends content (e.g., products, movies, music playlists) with the highest predicted user engagement and has traditionally been the focus of recommender systems. Exploration recommends content with uncertain predicted user engagement for the purpose of gathering more information. The importance of exploration has been recognized in recent years, particularly in settings with new users, new items, non-stationary preferences and attributes. In parallel, explaining recommendations ("recsplanations") is crucial if users are to understand their recommendations. Existing work has looked at bandits and explanations independently. We provide the first method that combines both in a principled manner. In particular, our method is able to jointly (1) learn which explanations each user responds to; (2) learn the best content to recommend for each user; and (3) balance exploration with exploitation to deal with uncertainty. Experiments with historical log data and tests with live production traffic in a large-scale music recommendation service show a significant improvement in user engagement.},
acmid = {3240354},
added-at = {2018-12-25T19:06:32.000+0100},
address = {New York, NY, USA},
author = {McInerney, James and Lacker, Benjamin and Hansen, Samantha and Higley, Karl and Bouchard, Hugues and Gruson, Alois and Mehrotra, Rishabh},
biburl = {https://www.bibsonomy.org/bibtex/28398b26af8d86e48cb457e0931a8c1f7/brusilovsky},
booktitle = {Proceedings of the 12th ACM Conference on Recommender Systems},
description = {Explore, exploit, and explain},
doi = {10.1145/3240323.3240354},
interhash = {392948ec01a6abb909426f16fe02b202},
intrahash = {8398b26af8d86e48cb457e0931a8c1f7},
isbn = {978-1-4503-5901-6},
keywords = {explanation recommender},
location = {Vancouver, British Columbia, Canada},
numpages = {9},
pages = {31--39},
publisher = {ACM},
series = {RecSys '18},
timestamp = {2019-06-09T09:31:52.000+0200},
title = {Explore, Exploit, and Explain: Personalizing Explainable Recommendations with Bandits},
url = {http://doi.acm.org/10.1145/3240323.3240354},
year = 2018
}