@article{wang2026qwenvla,
  title={Qwen-VLA: Unifying Vision-Language-Action Modeling across Tasks, Environments, and Robot Embodiments},
  author={Wang, Qiuyue and Li, Mingsheng and Guan, Jian and Ye, Jinhui and Xie, Sicheng and Liu, Yitao and Chen, Junhao and Liang, Zhixuan and Zhang, Jie and Hu, Xintong and Huang, Xuhong and Lin, Pei and Lin, Junyang and Liu, Dayiheng and Bai, Shuai and Zhou, Jingren and Zhang, Jiazhao and Yuan, Haoqi and Zhou, Gengze and Yin, Hang and Wang, Ye and Huang, Yiyang and Lei, Zixing and Peng, Wujian and Chen, Delin and Zheng, Yingming and Fan, Jingyang and Zhuang, Xianwei and Zhou, Xin and Li, Haoyang and Chen, Anzhe and Zhang, Tong and Liu, Xuejing and Sun, Yuchong and Chen, Ruizhe and Li, Zhaohai and Lv, Chenxu and Yang, Zhibo and Yu, Tao and Chen, Xionghui},
  journal={arXiv preprint arXiv:2605.30280},
  year={2026}
}