@article{sa2va,title={Sa2VA: Marrying SAM2 with LLaVA for Dense Grounded Understanding of Images and Videos},author={Yuan, Haobo and Li, Xiangtai and Zhang, Tao and Huang, Zilong and Xu, Shilin and Ji, Shunping and Tong, Yunhai and Qi, Lu and Feng, Jiashi and Yang, Ming-Hsuan},journal={arXiv pre-print},year={2025},}
@inproceedings{zhang2025point,title={Point Could Mamba: Point Cloud Learning via State Space Model},author={Zhang, Tao and Li, Xiangtai and Yuan, Haobo and Ji, Shunping and Yan, Shuicheng},booktitle={AAAI},address={Philadelphia, PA, USA},year={2025},}
@inproceedings{xu2025rapsam,title={RAP-SAM:Towards Real-Time All-Purpose Segment Anything},author={Xu, Shilin and Yuan, Haobo and Shi, Qingyu and Qi, Lu and Wang, Jingbo and Yang, Yibo and Li, Yining and Chen, Kai and Tong, Yunhai and Ghanem, Bernard and Li, Xiangtai and Yang, Ming-Hsuan},booktitle={ICLR},address={Singapore},year={2025},}
@inproceedings{yuan2024ovsam,title={Open-Vocabulary SAM: Segment and Recognize Twenty-thousand Classes Interactively},author={Yuan, Haobo and Li, Xiangtai and Zhou, Chong and Li, Yining and Chen, Kai and Loy, Chen Change},booktitle={ECCV},address={Milano, Italy},year={2024},}
@inproceedings{li2024omg,title={OMG-Seg: Is One Model Good Enough For All Segmentation?},author={Li, Xiangtai and Yuan, Haobo and Li, Wei and Ding, Henghui and Wu, Size and Zhang, Wenwei and Li, Yining and Chen, Kai and Loy, Chen Change},booktitle={CVPR},address={Seattle, WA, USA},year={2024},}
@inproceedings{zhang2024omgllava,title={OMG-LLaVA: Bridging Image-level, Object-level, Pixel-level Reasoning and Understanding},author={Zhang, Tao and Li, Xiangtai and Fei, Hao and Yuan, Haobo and Wu, Shengqiong and Ji, Shunping and Loy, Chen Change and Yan, Shuicheng},booktitle={NeurIPS},address={Vancouver, Canada},year={2024},}
@article{li2024transformer,title={Transformer-based Visual Segmentation: A Survey},author={Li, Xiangtai and Ding, Henghui and Yuan, Haobo and Zhang, Wenwei and Pang, Jiangmiao and Cheng, Guangliang and Chen, Kai and Liu, Ziwei and Loy, Chen Change},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},year={2024},}
@article{wu2024towards,title={Towards Open Vocabulary Learning: A Survey},author={Wu, Jianzong and Li, Xiangtai and Xu, Shilin and Yuan, Haobo and Ding, Henghui and Yang, Yibo and Li, Xia and Zhang, Jiangning and Tong, Yunhai and Jiang, Xudong and Ghanem, Bernard and Tao, Dacheng},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},year={2024},}
@article{li2023panopticpartformer++,title={PanopticPartFormer++: A Unified and Decoupled View for Panoptic Part Segmentation},author={Li, Xiangtai and Xu, Shilin and Yang, Yibo and Yuan, Haobo and Cheng, Guangliang and Tong, Yunhai and Lin, Zhouchen and Yang, Ming-Hsuan and Tao, Dacheng},journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},year={2024},}
arXiv
Mamba or RWKV: Exploring High-Quality and High-Efficiency Segment Anything Model
@article{yuan2024mamba,title={Mamba or RWKV: Exploring High-Quality and High-Efficiency Segment Anything Model},author={Yuan, Haobo and Li, Xiangtai and Qi, Lu and Zhang, Tao and Yang, Ming-Hsuan and Yan, Shuicheng and Loy, Chen Change},journal={arXiv preprint},year={2024},}
arXiv
LLAVADI: What Matters For Multimodal Large Language Models Distillation
@article{xu2024LLAVADI,title={LLAVADI: What Matters For Multimodal Large Language Models Distillation},author={Xu, Shilin and Li, Xiangtai and Yuan, Haobo and Qi, Lu and Tong, Yunhai and Yang, Ming-Hsuan},journal={arXiv preprint},year={2024},}
@inproceedings{li2023tube,title={Tube-Link: A Flexible Cross Tube Baseline for Universal Video Segmentation},author={Li, Xiangtai and Yuan, Haobo and Zhang, Wenwei and Cheng, Guangliang and Pang, Jiangmiao and Loy, Chen Change},booktitle={ICCV},address={Paris, France},year={2023},}
@article{yuan2023monocular,title={Monocular Road Planar Parallax Estimation},author={Yuan, Haobo and Chen, Teng and Sui, Wei and Xie, Jiafeng and Zhang, Lefei and Li, Yuan and Zhang, Qian},journal={IEEE Transactions on Image Processing},volume={32},pages={3690-3701},year={2023},}
@inproceedings{yang2023neural,title={Neural Collapse Inspired Feature-Classifier Alignment for Few-Shot Class-Incremental Learning},author={Yang, Yibo and Yuan, Haobo and Li, Xiangtai and Lin, Zhouchen and Torr, Philip and Tao, Dacheng},booktitle={ICLR},year={2023},address={Kigali, Rwanda},}
@article{xu2023multi,title={Multi-Task Learning with Multi-query Transformer for Dense Prediction},author={Xu, Yangyang and Li, Xiangtai and Yuan, Haobo and Yang, Yibo and Zhang, Lefei},journal={IEEE Transactions on Circuits and Systems for Video Technology},year={2023},}
arXiv
Neural Collapse Terminus: A Unified Solution for Class Incremental Learning and Its Variants
@article{yang2023nct,author={Yang, Yibo and Yuan, Haobo and Li, Xiangtai and Wu, Jianlong and Zhang, Lefei and Lin, Zhouchen and Torr, Philip and Tao, Dacheng and Ghanem, Bernard},title={Neural Collapse Terminus: A Unified Solution for Class Incremental Learning and Its Variants},journal={arXiv pre-print},year={2023},}
@inproceedings{yuan2022polyphonicformer,title={PolyphonicFormer: Unified Query Learning for Depth-aware Video Panoptic Segmentation},author={Yuan, Haobo and Li, Xiangtai and Yang, Yibo and Cheng, Guangliang and Zhang, Jing and Tong, Yunhai and Zhang, Lefei and Tao, Dacheng},booktitle={ECCV},year={2022},address={Tel Aviv, Israel},}
@inproceedings{yang2022towards,title={Towards Theoretically Inspired Neural Initialization Optimization},author={Yang, Yibo and Wang, Hong and Yuan, Haobo and Lin, Zhouchen},booktitle={NeurIPS},year={2022},address={New Orleans, LA, USA},}
@article{chen2020bossa,title={BOSSA: a decentralized system for proofs of data retrievability and replication},author={Chen, Dian and Yuan, Haobo and Hu, Shengshan and Wang, Qian and Wang, Cong},journal={IEEE Transactions on Parallel and Distributed Systems},volume={32},number={4},pages={786--798},year={2021},publisher={IEEE},}