@inproceedings{ewrl18_belousov, author = "Belousov, B. and Peters, J.", year = "2018", title = "Mean squared advantage minimization as a consequence of entropic policy improvement regularization", booktitle = "European Workshops on Reinforcement Learning (EWRL)", key = "policy optimization, entropic proximal mappings, actor-critic algorithms", URL = "https://www.ias.informatik.tu-darmstadt.de/uploads/Team/BorisBelousov/ewrl18_belousov.pdf", crossref = "p11164" }