@article{
meng2026mmeureka,
title={{MM}-Eureka: Toward Stable Multimodal Reasoning via Rule-based Reinforcement Learning with Policy Drift Control},
author={Fanqing Meng and Lingxiao Du and Zongkai Liu and Zhixiang Zhou and Quanfeng Lu and Tiancheng Han and Daocheng Fu and Kaipeng Zhang and Ping Luo and Yu Qiao and Jiaheng Zhang and Michael Qizhe Shieh and Qiaosheng Zhang and Wenqi Shao},
journal={Transactions on Machine Learning Research},
issn={2835-8856},
year={2026},
url={https://openreview.net/forum?id=8y1ch6y24H},
note={}
}