BibTeX
@inproceedings{ewrl18_belousov, author = "Belousov, B. and Peters, J.", year = "2018", title = "Mean Squared Advantage Minimization as a Consequence of Entropic Policy Improvement Regularization", booktitle = "European Workshops on Reinforcement Learning (EWRL)", key = "policy optimization, entropic proximal mappings, actor-critic algorithms", URL = "https://www.ias.informatik.tu-darmstadt.de/uploads/Team/BorisBelousov/ewrl18_belousov.pdf", crossref = "p11164" }