BibTeX
@inproceedings{ewrl18_belousov,
author = "Belousov, B. and Peters, J.",
year = "2018",
title = "Mean Squared Advantage Minimization as a Consequence of Entropic Policy Improvement Regularization",
booktitle = "European Workshops on Reinforcement Learning (EWRL)",
key = "policy optimization, entropic proximal mappings, actor-critic algorithms",
URL = "https://www.ias.informatik.tu-darmstadt.de/uploads/Team/BorisBelousov/ewrl18_belousov.pdf",
crossref = "p11164"
}