publications | Shuyang (Kevin) Sun

2024

Rag-driver: Generalisable driving explanations with retrieval-augmented in-context learning in multi-modal large language model

Jianhao Yuan, Shuyang Sun, Daniel Omeiza, Bo Zhao, Paul Newman, Lars Kunze, and Matthew Gadd

RSS, 2024
SynArtifact: Classifying and Alleviating Artifacts in Synthetic Images via Vision-Language Model

Bin Cao, Jianhao Yuan, Yexin Liu, Jian Li, Shuyang Sun, Jing Liu, and Bo Zhao

arXiv preprint arXiv:2402.18068, 2024
kNN-CLIP: Retrieval Enables Training-Free Segmentation on Continually Expanding Large Vocabularies

Zhongrui Gui, Shuyang Sun, Runjia Li, Jianhao Yuan, Zhaochong An, Karsten Roth, Ameya Prabhu, and Philip Torr

arXiv preprint arXiv:2404.09447, 2024

         
CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor
 Shuyang Sun*, Runjia Li*, Philip Torr, Xiuye Gu, and Siyang Li
 In CVPR, 2024 
 
 Bib PDF Code Website 
 @inproceedings{clip_as_rnn,
  title = {CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor},
  author = {Sun*, Shuyang and Li*, Runjia and Torr, Philip and Gu, Xiuye and Li, Siyang},
  year = {2024},
  booktitle = {CVPR},
  selected = true,
  pdf = https://arxiv.org/pdf/2312.07661.pdf,
  website = https://torrvision.com/clip_as_rnn/,
  code = https://github.com/kevin-ssy/clip_as_rnn,
  preview = clip_as_rnn.jpg,
  highlight = true
} 

Real-Fake: Effective Training Data Synthesis Through Distribution Matching

Jianhao Yuan, Jie Zhang, Shuyang Sun, Philip Torr, and Bo Zhao

ICLR, 2024

Bib PDF Code Website

@article{yuan2023real,
  title = {Real-Fake: Effective Training Data Synthesis Through Distribution Matching},
  author = {Yuan, Jianhao and Zhang, Jie and Sun, Shuyang and Torr, Philip and Zhao, Bo},
  journal = {ICLR},
  year = {2024},
  selected = true,
  preview = realfake.jpg,
  website = https://torrvision.com/realfake/,
  pdf = https://arxiv.org/abs/2310.10402,
  code = https://github.com/baai-dcai/training-data-synthesis
}

Lumix: Improving mixup by better modelling label uncertainty

Shuyang Sun*, Jie-Neng Chen*, Ruifei He, Alan Yuille, Philip Torr, and Song Bai

ICASSP, 2024

Bib

@article{sun2022lumix,
  title = {Lumix: Improving mixup by better modelling label uncertainty},
  author = {Sun*, Shuyang and Chen*, Jie-Neng and He, Ruifei and Yuille, Alan and Torr, Philip and Bai, Song},
  journal = {ICASSP},
  year = {2024},
  thumbnails = true
}

2023

ReMaX: Relaxing for better training on efficient panoptic segmentation

Shuyang Sun, Weijun Wang, Qihang Yu, Andrew Howard, Philip Torr, and Liang-Chieh Chen

NeurIPS, 2023

Bib PDF Website

@article{sun2023remax,
  title = {ReMaX: Relaxing for better training on efficient panoptic segmentation},
  author = {Sun, Shuyang and Wang, Weijun and Yu, Qihang and Howard, Andrew and Torr, Philip and Chen, Liang-Chieh},
  journal = {NeurIPS},
  year = {2023},
  selected = true,
  pdf = https://arxiv.org/pdf/2306.17319.pdf,
  website = https://nips.cc/virtual/2023/poster/71598,
  preview = remax.jpg
}

OxfordTVG-HIC: Can Machine Make Humorous Captions from Images?

Runjia Li*, Shuyang Sun*, Mohamed Elhoseiny, and Philip Torr

In ICCV, 2023

Bib

@inproceedings{li2023oxfordtvg,
  title = {OxfordTVG-HIC: Can Machine Make Humorous Captions from Images?},
  author = {Li*, Runjia and Sun*, Shuyang and Elhoseiny, Mohamed and Torr, Philip},
  booktitle = {ICCV},
  pages = {20293--20303},
  year = {2023},
}

Is synthetic data from generative models ready for image recognition?

Ruifei He, Shuyang Sun, Xin Yu, Chuhui Xue, Wenqing Zhang, Philip Torr, Song Bai, and Xiaojuan Qi

ICLR, spotlight, 2023

Bib PDF Code

@article{he2022synthetic,
  title = {Is synthetic data from generative models ready for image recognition?},
  author = {He, Ruifei and Sun, Shuyang and Yu, Xin and Xue, Chuhui and Zhang, Wenqing and Torr, Philip and Bai, Song and Qi, Xiaojuan},
  journal = {ICLR, spotlight},
  year = {2023},
  selected = true,
  code = https://github.com/cvmi-lab/syntheticdata,
  pdf = https://arxiv.org/abs/2210.07574,
  preview = synthesis.png
}

2022

Slot-vps: Object-centric representation learning for video panoptic segmentation

Yi Zhou, Hui Zhang, Hana Lee, Shuyang Sun, Pingjun Li, Yangguang Zhu, ByungIn Yoo, Xiaojuan Qi, and Jae-Joon Han

In CVPR, 2022

Bib

@inproceedings{zhou2022slot,
  title = {Slot-vps: Object-centric representation learning for video panoptic segmentation},
  author = {Zhou, Yi and Zhang, Hui and Lee, Hana and Sun, Shuyang and Li, Pingjun and Zhu, Yangguang and Yoo, ByungIn and Qi, Xiaojuan and Han, Jae-Joon},
  booktitle = {CVPR},
  pages = {3093--3103},
  year = {2022},
}

Knowledge distillation as efficient pre-training: Faster convergence, higher data-efficiency, and better transferability

Ruifei He, Shuyang Sun, Jihan Yang, Song Bai, and Xiaojuan Qi

In CVPR, 2022

Bib

@inproceedings{he2022knowledge,
  title = {Knowledge distillation as efficient pre-training: Faster convergence, higher data-efficiency, and better transferability},
  author = {He, Ruifei and Sun, Shuyang and Yang, Jihan and Bai, Song and Qi, Xiaojuan},
  booktitle = {CVPR},
  pages = {9161--9171},
  year = {2022},
}

Patch-based separable transformer for visual recognition

Shuyang Sun, Xiaoyu Yue, Hengshuang Zhao, Philip Torr, and Song Bai

T-PAMI, 2022

Bib

@article{sun2022patch,
  title = {Patch-based separable transformer for visual recognition},
  author = {Sun, Shuyang and Yue, Xiaoyu and Zhao, Hengshuang and Torr, Philip and Bai, Song},
  journal = {T-PAMI},
  year = {2022},
  publisher = {IEEE},
}

2021

TransMix: Attend to Mix for Vision Transformers

Shuyang Sun*, Jie-Neng Chen*, Ju He, Philip Torr, Alan Yuille, and Song Bai

CVPR, 2021

Bib PDF Code

@article{chen2021transmix,
  title = {TransMix: Attend to Mix for Vision Transformers},
  author = {Sun*, Shuyang and Chen*, Jie-Neng and He, Ju and Torr, Philip and Yuille, Alan and Bai, Song},
  journal = {CVPR},
  year = {2021},
  selected = true,
  pdf = https://arxiv.org/pdf/2111.09833.pdf,
  code = https://github.com/beckschen/transmix,
  preview = transmix.jpg
}

Visual Parser: Representing Part-whole Hierarchies with Transformers

Shuyang Sun, Xiaoyu Yue, Song Bai, and Philip Torr

arXiv preprint arXiv:2107.05790, 2021

Bib PDF Code

@article{sun2021visual,
  title = {Visual Parser: Representing Part-whole Hierarchies with Transformers},
  author = {Sun, Shuyang and Yue, Xiaoyu and Bai, Song and Torr, Philip},
  journal = {arXiv preprint arXiv:2107.05790},
  year = {2021},
  selected = true,
  preview = vip.png,
  pdf = https://arxiv.org/abs/2107.05790,
  code = https://github.com/kevin-ssy/vip
}

Vision transformer with progressive sampling

Xiaoyu Yue*, Shuyang Sun*, Zhanghui Kuang, Meng Wei, Philip Torr, Wayne Zhang, and Dahua Lin

In ICCV, 2021

Bib PDF Code

@inproceedings{yue2021vision,
  title = {Vision transformer with progressive sampling},
  author = {Yue*, Xiaoyu and Sun*, Shuyang and Kuang, Zhanghui and Wei, Meng and Torr, Philip and Zhang, Wayne and Lin, Dahua},
  booktitle = {ICCV},
  pages = {387--396},
  year = {2021},
  selected = true,
  preview = psvit.png,
  pdf = https://arxiv.org/abs/2108.01684,
  code = https://github.com/yuexy/ps-vit
}

Aggregation with Feature Detection

Shuyang Sun, Xiaoyu Yue, Xiaojuan Qi, Wanli Ouyang, Victor Adrian Prisacariu, and Philip Torr

In ICCV, 2021

Bib

@inproceedings{sun2021aggregation,
  title = {Aggregation with Feature Detection},
  author = {Sun, Shuyang and Yue, Xiaoyu and Qi, Xiaojuan and Ouyang, Wanli and Prisacariu, Victor Adrian and Torr, Philip},
  booktitle = {ICCV},
  pages = {527--536},
  year = {2021},
}

2020

Exploring the hierarchy in relation labels for scene graph generation

Yi Zhou, Shuyang Sun, Chao Zhang, Yikang Li, and Wanli Ouyang

arXiv preprint arXiv:2009.05834, 2020

Bib

@article{zhou2020exploring,
  title = {Exploring the hierarchy in relation labels for scene graph generation},
  author = {Zhou, Yi and Sun, Shuyang and Zhang, Chao and Li, Yikang and Ouyang, Wanli},
  journal = {arXiv preprint arXiv:2009.05834},
  year = {2020},
}

Learning to sample the most useful training patches from images

Shuyang Sun, Liang Chen, Gregory Slabaugh, and Philip Torr

arXiv preprint arXiv:2011.12097, 2020

Bib

@article{sun2020learning,
  title = {Learning to sample the most useful training patches from images},
  author = {Sun, Shuyang and Chen, Liang and Slabaugh, Gregory and Torr, Philip},
  journal = {arXiv preprint arXiv:2011.12097},
  year = {2020},
}

2019

Hybrid task cascade for instance segmentation

Kai Chen, Jiangmiao Pang, Jiaqi Wang, Yu Xiong, Xiaoxiao Li, Shuyang Sun, Wansen Feng, Ziwei Liu, Jianping Shi, Wanli Ouyang, and others

In CVPR, 2019

Bib

@inproceedings{chen2019hybrid,
  title = {Hybrid task cascade for instance segmentation},
  author = {Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and others},
  booktitle = {CVPR},
  pages = {4974--4983},
  year = {2019},
}

MMDetection: Open mmlab detection toolbox and benchmark

Kai Chen, Jiaqi Wang, Jiangmiao Pang, Yuhang Cao, Yu Xiong, Xiaoxiao Li, Shuyang Sun, Wansen Feng, Ziwei Liu, Jiarui Xu, and others

arXiv preprint arXiv:1906.07155, 2019

Bib

@article{chen2019mmdetection,
  title = {MMDetection: Open mmlab detection toolbox and benchmark},
  author = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Xu, Jiarui and others},
  journal = {arXiv preprint arXiv:1906.07155},
  year = {2019},
}

Robust multi-modality multi-object tracking

Wenwei Zhang, Hui Zhou, Shuyang Sun, Zhe Wang, Jianping Shi, and Chen Change Loy

In ICCV, 2019

Bib

@inproceedings{zhang2019robust,
  title = {Robust multi-modality multi-object tracking},
  author = {Zhang, Wenwei and Zhou, Hui and Sun, Shuyang and Wang, Zhe and Shi, Jianping and Loy, Chen Change},
  booktitle = {ICCV},
  pages = {2365--2374},
  year = {2019},
}

2018

Fishnet: A versatile backbone for image, region, and pixel level prediction

Shuyang Sun, Jiangmiao Pang, Jianping Shi, Shuai Yi, and Wanli Ouyang

NeurIPS, 2018

Bib PDF Code

@article{sun2018fishnet,
  title = {Fishnet: A versatile backbone for image, region, and pixel level prediction},
  author = {Sun, Shuyang and Pang, Jiangmiao and Shi, Jianping and Yi, Shuai and Ouyang, Wanli},
  journal = {NeurIPS},
  volume = {31},
  year = {2018},
  selected = true,
  pdf = https://arxiv.org/pdf/1901.03495.pdf,
  code = https://github.com/kevin-ssy/fishnet,
  preview = fish.png
}

Optical Flow Guided Feature: A Fast and Robust Motion Representation for Video Action Recognition

Shuyang Sun, Zhanghui Kuang, Lu Sheng, Wanli Ouyang, and Wei Zhang

In CVPR, 2018

Bib PDF Code

@inproceedings{sun2018optical,
  title = {Optical Flow Guided Feature: A Fast and Robust Motion Representation for Video Action Recognition},
  author = {Sun, Shuyang and Kuang, Zhanghui and Sheng, Lu and Ouyang, Wanli and Zhang, Wei},
  booktitle = {CVPR},
  year = {2018},
  selected = true,
  pdf = https://arxiv.org/pdf/1711.11152.pdf,
  code = https://github.com/kevin-ssy/optical-flow-guided-feature,
  preview = off.jpg
}

2017

Spindle net: Person re-identification with human body region guided feature decomposition and fusion

Haiyu Zhao, Maoqing Tian, Shuyang Sun, Jing Shao, Junjie Yan, Shuai Yi, Xiaogang Wang, and Xiaoou Tang

In CVPR, 2017

Bib

@inproceedings{zhao2017spindle,
  title = {Spindle net: Person re-identification with human body region guided feature decomposition and fusion},
  author = {Zhao, Haiyu and Tian, Maoqing and Sun, Shuyang and Shao, Jing and Yan, Junjie and Yi, Shuai and Wang, Xiaogang and Tang, Xiaoou},
  booktitle = {CVPR},
  pages = {1077--1085},
  year = {2017},
}