publications | Mingdong Wu

2025

IROS 2025

SimLauncher: Launching Sample-Efficient Real-world Robotic Reinforcement Learning via Simulation Pre-training

Mingdong Wu*, Lehong Wu*, Yizhuo Wu*, Weiyao Huang, Hongwei Fan, Zheyuan Hu, Haoran Geng, Jinzhou Li, Jiahe Ying, Long Yang, and others

IEEE/RSJ International Conference on Intelligent Robots and Systems, 2025

Bib PDF Video

@article{wu2025simlauncher,
  video = {https://www.dropbox.com/scl/fi/5yif7dl2bz4pcv2uw9s03/Training-Timelapse.mp4?rlkey=iga15684bqfy2fz9fwk6edsay&st=mryoerqt&dl=0},
  title = {SimLauncher: Launching Sample-Efficient Real-world Robotic Reinforcement Learning via Simulation Pre-training},
  author = {Wu*, Mingdong and Wu*, Lehong and Wu*, Yizhuo and Huang, Weiyao and Fan, Hongwei and Hu, Zheyuan and Geng, Haoran and Li, Jinzhou and Ying, Jiahe and Yang, Long and others},
  journal = {IEEE/RSJ International Conference on Intelligent Robots and Systems},
  year = {2025}
}

CoRL 2025

UniTac2Pose: A Unified Approach Learned in Simulation for Generalizable Visuotactile In-hand Pose Estimation

Mingdong Wu*, Long Yan*, Jin Liu*, Weiyao Huang, Lehong Wu, Zelin Chen, Daolin Ma, and Hao Dong

Conference on Robot Learning, 2025

Bib PDF Code

@article{wu2025unitac2pose,
  title = {UniTac2Pose: A Unified Approach Learned in Simulation for Generalizable Visuotactile In-hand Pose Estimation},
  author = {Wu*, Mingdong and Yan*, Long and Liu*, Jin and Huang, Weiyao and Wu, Lehong and Chen, Zelin and Ma, Daolin and Dong, Hao},
  journal = {Conference on Robot Learning},
  year = {2025}
}

IROS 2025

Adaptive Visuo-Tactile Fusion with Predictive Force Attention for Dexterous Manipulation

Jinzhou Li*, Tianhao Wu*, Jiyao Zhang, Zeyuan Chen, Haotian Jin, Mingdong Wu, Yujun Shen, Yaodong Yang, and Hao Dong

IEEE/RSJ International Conference on Intelligent Robots and Systems, 2025

Bib PDF

@article{li2025adaptive,
  title = {Adaptive Visuo-Tactile Fusion with Predictive Force Attention for Dexterous Manipulation},
  author = {Li*, Jinzhou and Wu*, Tianhao and Zhang, Jiyao and Chen, Zeyuan and Jin, Haotian and Wu, Mingdong and Shen, Yujun and Yang, Yaodong and Dong, Hao},
  journal = {IEEE/RSJ International Conference on Intelligent Robots and Systems},
  year = {2025}
}

ICRA 2025

Canonical representation and force-based pretraining of 3d tactile for dexterous visuo-tactile policy learning

Tianhao Wu, Jinzhou Li*, Jiyao Zhang*, Mingdong Wu, and Hao Dong

IEEE International Conference on Robotics and Automation, 2025

Bib PDF

@article{wu2024canonical,
  title = {Canonical representation and force-based pretraining of 3d tactile for dexterous visuo-tactile policy learning},
  author = {Wu, Tianhao and Li*, Jinzhou and Zhang*, Jiyao and Wu, Mingdong and Dong, Hao},
  journal = {IEEE International Conference on Robotics and Automation},
  year = {2025}
}

ICLR 2025

Adamanip: Adaptive articulated object manipulation environments and policy learning

Yuanfei Wang, Xiaojie Zhang, Ruihai Wu, Yu Li, Yan Shen, Mingdong Wu, Zhaofeng He, Yizhou Wang, and Hao Dong

International Conference on Learning Representations, 2025

Bib PDF

@article{wang2025adamanip,
  title = {Adamanip: Adaptive articulated object manipulation environments and policy learning},
  author = {Wang, Yuanfei and Zhang, Xiaojie and Wu, Ruihai and Li, Yu and Shen, Yan and Wu, Mingdong and He, Zhaofeng and Wang, Yizhou and Dong, Hao},
  journal = {International Conference on Learning Representations},
  year = {2025}
}

RSS 2025

Cordvip: Correspondence-based visuomotor policy for dexterous manipulation in real-world

Yankai Fu, Qiuxuan Feng, Ning Chen, Zichen Zhou, Mengzhen Liu, Mingdong Wu, Tianxing Chen, Shanyu Rong, Jiaming Liu, Hao Dong, and others

Robotics: Science and Systems, 2025

Bib PDF

@article{fu2025cordvip,
  title = {Cordvip: Correspondence-based visuomotor policy for dexterous manipulation in real-world},
  author = {Fu, Yankai and Feng, Qiuxuan and Chen, Ning and Zhou, Zichen and Liu, Mengzhen and Wu, Mingdong and Chen, Tianxing and Rong, Shanyu and Liu, Jiaming and Dong, Hao and others},
  journal = {Robotics: Science and Systems},
  year = {2025}
}

ICCV 2025

GFPack++: Improving 2D Irregular Packing by Learning Gradient Field with Attention

Tianyang Xue, Lin Lv, Yang Liu, Wu Mingdong, Dong Hao, Zhang Yanbin, Han Renmin, and Chen Baoquan

International Conference on Computer Vision, 2025

Bib PDF

@article{xue2024gfpack++,
  oral = {Highlight},
  title = {GFPack++: Improving 2D Irregular Packing by Learning Gradient Field with Attention},
  author = {Xue, Tianyang and Lv, Lin and Liu, Yang and Mingdong, Wu and Hao, Dong and Yanbin, Zhang and Renmin, Han and Baoquan, Chen},
  journal = {International Conference on Computer Vision},
  year = {2025}
}

2024

NeurIPS 2024

MO-DDN: A Coarse-to-Fine Attribute-based Exploration Agent for Multi-object Demand-driven Navigation

Hongcheng Wang, Peiqi Liu, Wenzhe Cai, Mingdong Wu, Zhengyu Qian, and Hao Dong

Thirty-eighth Conference on Neural Information Processing Systems, 2024

Bib PDF

@article{wang2024mo,
  title = {MO-DDN: A Coarse-to-Fine Attribute-based Exploration Agent for Multi-object Demand-driven Navigation},
  author = {Wang, Hongcheng and Liu, Peiqi and Cai, Wenzhe and Wu, Mingdong and Qian, Zhengyu and Dong, Hao},
  journal = {Thirty-eighth Conference on Neural Information Processing Systems},
  year = {2024}
}

Arxiv 2024

UniDexFPM: Universal Dexterous Functional Pre-grasp Manipulation Via Diffusion Policy

Tianhao Wu*, Yunchong Gan*, Mingdong Wu, Jingbo Cheng, Yaodong Yang, Yixin Zhu, and Hao Dong

Under Review, 2024

Bib PDF Website

@article{zhang2024unidexfpm,
  title = {UniDexFPM: Universal Dexterous Functional Pre-grasp Manipulation Via Diffusion Policy},
  author = {Wu*, Tianhao and Gan*, Yunchong and Wu, Mingdong and Cheng, Jingbo and Yang, Yaodong and Zhu, Yixin and Dong, Hao},
  journal = {Under Review},
  year = {2024}
}

ECCV 2024

Omni6DPose: A Benchmark and Model for Universal 6D Object Pose Estimation and Tracking

Jiyao Zhang*, Weiyao Huang*, Bo Peng*, Mingdong Wu, Fei Hu, Zijian Chen, Bo Zhao, and Hao Dong

European Conference on Computer Vision, 2024

Bib PDF Website

@article{zhang2024omni6dpose,
  title = {Omni6DPose: A Benchmark and Model for Universal 6D Object Pose Estimation and Tracking},
  author = {Zhang*, Jiyao and Huang*, Weiyao and Peng*, Bo and Wu, Mingdong and Hu, Fei and Chen, Zijian and Zhao, Bo and Dong, Hao},
  journal = {European Conference on Computer Vision},
  year = {2024}
}

RAL 2024

Distilling Functional Rearrangement Priors from Large Models

Yiming Zeng*, Mingdong Wu*, Long Yang, Jiyao Zhang, Hao Ding, Hui Cheng, and Hao Dong

IEEE Robotics and Automation Letters, 2024

Bib PDF Website

@article{zeng2023distilling,
  title = {Distilling Functional Rearrangement Priors from Large Models},
  author = {Zeng*, Yiming and Wu*, Mingdong and Yang, Long and Zhang, Jiyao and Ding, Hao and Cheng, Hui and Dong, Hao},
  journal = {IEEE Robotics and Automation Letters},
  year = {2024}
}

2023

NeurIPS 2023

GenPose: Generative Category-level Object Pose Estimation via Diffusion Models

Jiyao Zhang*, Mingdong Wu*, and Hao Dong

Thirty-seventh Conference on Neural Information Processing Systems, 2023

Abs Bib PDF Website Code

机器之心

We explore a pure generative approach to tackle the multi-hypothesis issue in 6D Category-level Object Pose Estimation. The key idea is to generate pose candidates using a score-based diffusion model and filter out outliers using an energy-based diffusion model. By aggregating the remaining candidates, we can obtain a robust and high-quality output pose.

@article{zhang2023genpose,
  news = {机器之心},
  news_link = {https://mp.weixin.qq.com/s/RYV_aap9eYtwX_4_Ghr5Vw},
  sota_link = {https://paperswithcode.com/sota/6d-pose-estimation-using-rgbd-on-real275?p=genpose-generative-category-level-object-pose},
  sota_badge = {https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/genpose-generative-category-level-object-pose/6d-pose-estimation-using-rgbd-on-real275},
  star = {https://img.shields.io/github/stars/Jiyao06/GenPose?style=social&amp;label=Code+Stars},
  title = {GenPose: Generative Category-level Object Pose Estimation via Diffusion Models},
  author = {Zhang*, Jiyao and Wu*, Mingdong and Dong, Hao},
  journal = {Thirty-seventh Conference on Neural Information Processing Systems},
  year = {2023}
}

NeurIPS 2023

Learning Score-based Grasping Primitive for Human-assisting Dexterous Grasping

Tianhao Wu*, Mingdong Wu*, Jiyao Zhang, Yunchong Gan, and Hao Dong

Thirty-seventh Conference on Neural Information Processing Systems, 2023

Bib PDF Website Code

新智元

@article{wu2023learning,
  news = {新智元},
  news_link = {https://mp.weixin.qq.com/s/hpzZWMizR8tPSGIvGVjPoA},
  star = {https://img.shields.io/github/stars/tianhaowuhz/human-assisting-dex-grasp?style=social&amp;label=Code+Stars},
  title = {Learning Score-based Grasping Primitive for Human-assisting Dexterous Grasping},
  author = {Wu*, Tianhao and Wu*, Mingdong and Zhang, Jiyao and Gan, Yunchong and Dong, Hao},
  journal = {Thirty-seventh Conference on Neural Information Processing Systems},
  year = {2023}
}

NeurIPS 2023

Find What You Want: Learning Demand-conditioned Object Attribute Space for Demand-driven Navigation

Hongcheng Wang, Andy Guan Hong Chen, Xiaoqi Li, Mingdong Wu, and Hao Dong

Thirty-seventh Conference on Neural Information Processing Systems, 2023

Bib PDF Website Code

机器之心

@article{wang2023find,
  news = {机器之心},
  news_link = {https://mp.weixin.qq.com/s/Sj2q02VkY6HMzHDot6X9_w},
  star = {https://img.shields.io/github/stars/whcpumpkin/Demand-driven-navigation?style=social&amp;label=Code+Stars},
  title = {Find What You Want: Learning Demand-conditioned Object Attribute Space for Demand-driven Navigation},
  author = {Wang, Hongcheng and Chen, Andy Guan Hong and Li, Xiaoqi and Wu, Mingdong and Dong, Hao},
  journal = {Thirty-seventh Conference on Neural Information Processing Systems},
  year = {2023}
}

SIGGRAPH Asia
2023

Learning Gradient Fields for Scalable and Generalizable Irregular Packing

Tianyang Xue*, Mingdong Wu*, Lin Lu, Haoxuan Wang, Hao Dong, and Baoquan Chen

SIGGRAPH Asia, 2023

Bib PDF Website

@article{Xue2023learning,
  abbryear = {2023},
  title = {Learning Gradient Fields for Scalable and Generalizable Irregular Packing},
  author = {Xue*, Tianyang and Wu*, Mingdong and Lu, Lin and Wang, Haoxuan and Dong, Hao and Chen, Baoquan},
  journal = {SIGGRAPH Asia},
  year = {2023}
}

BMVC 2023
Oral

Score-PA: Score-based 3D Part Assembly

Junfeng Cheng, Mingdong Wu, Ruiyuan Zhang, Guanqi Zhan, Chao Wu, and Hao Dong

British Machine Vision Conference, 2023

Bib PDF Code

@article{cheng2023score,
  oral = {Oral},
  title = {Score-PA: Score-based 3D Part Assembly},
  author = {Cheng, Junfeng and Wu, Mingdong and Zhang, Ruiyuan and Zhan, Guanqi and Wu, Chao and Dong, Hao},
  journal = {British Machine Vision Conference},
  year = {2023}
}

RAL 2023

Learning Semantic-Agnostic and Spatial-Aware Representation for Generalizable Visual-Audio Navigation

Hongcheng Wang, Yuxuan Wang, Fangwei Zhong, Mingdong Wu, Jianwei Zhang, Yizhou Wang, and Hao Dong

IEEE Robotics and Automation Letters, 2023

Bib PDF Website Code

@article{wang2023learning,
  title = {Learning Semantic-Agnostic and Spatial-Aware Representation for Generalizable Visual-Audio Navigation},
  author = {Wang, Hongcheng and Wang, Yuxuan and Zhong, Fangwei and Wu, Mingdong and Zhang, Jianwei and Wang, Yizhou and Dong, Hao},
  journal = {IEEE Robotics and Automation Letters},
  year = {2023},
  publisher = {IEEE}
}

CVPR 2023

GFPose: Learning 3d human pose prior with gradient fields

Hai Ci, Mingdong Wu, Wentao Zhu, Xiaoxuan Ma, Hao Dong, Fangwei Zhong, and Yizhou Wang

Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023

Abs Bib PDF Website Code

GFPose is a unified 3D human pose prior model that can be easily used for various applications, e.g., 3D human pose estimation, pose denoising and generation. Our key idea is to estimate the gradient field (a.k.a, score) of the perturbed human pose. We can leverage the gradient to adjust poses to be more plausible and feasible to a task specification.

@inproceedings{ci2023gfpose,
  sota_link = {https://paperswithcode.com/sota/multi-hypotheses-3d-human-pose-estimation-on?p=gfpose-learning-3d-human-pose-prior-with},
  sota_badge = {https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/gfpose-learning-3d-human-pose-prior-with/multi-hypotheses-3d-human-pose-estimation-on},
  star = {https://img.shields.io/github/stars/Embracing/GFPose?style=social&amp;label=Code+Stars},
  title = {GFPose: Learning 3d human pose prior with gradient fields},
  author = {Ci, Hai and Wu, Mingdong and Zhu, Wentao and Ma, Xiaoxuan and Dong, Hao and Zhong, Fangwei and Wang, Yizhou},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {4800--4810},
  year = {2023}
}

2022

NeurIPS 2022
TarGF: Learning Target Gradient Field to Rearrange Objects without Explicit Goal Specification

Mingdong Wu*, Fangwei Zhong*, Yulong Xia, and Hao Dong

Advances in Neural Information Processing Systems, 2022

Abs Bib PDF Website Code

We study object rearrangement without explicit goal specification. The agent is given examples from a target distribution and aims at rearranging objects to increase the likelihood of the distribution. Our key idea is to learn a target gradient field that indicates the fastest direction to increase the likelihood from examples via score-matching.
@inproceedings{wu2022targf, title = {Tar{GF}: Learning Target Gradient Field to Rearrange Objects without Explicit Goal Specification}, author = {Wu*, Mingdong and Zhong*, Fangwei and Xia, Yulong and Dong, Hao}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Oh, Alice H. and Agarwal, Alekh and Belgrave, Danielle and Cho, Kyunghyun}, year = {2022}, url = {https://openreview.net/forum?id=Euv1nXN98P3} }