@inproceedings{dedaa08eae4f4cb7a41d01e1a11629ca,
title = "SPGNet: Semantic prediction guidance for scene parsing",
abstract = "Multi-scale context module and single-stage encoder-decoder structure are commonly employed for semantic segmentation. The multi-scale context module refers to the operations to aggregate feature responses from a large spatial extent, while the single-stage encoder-decoder structure encodes the high-level semantic information in the encoder path and recovers the boundary information in the decoder path. In contrast, multi-stage encoder-decoder networks have been widely used in human pose estimation and show superior performance than their single-stage counterpart. However, few efforts have been attempted to bring this effective design to semantic segmentation. In this work, we propose a Semantic Prediction Guidance (SPG) module which learns to re-weight the local features through the guidance from pixel-wise semantic prediction. We find that by carefully re-weighting features across stages, a two-stage encoder-decoder network coupled with our proposed SPG module can significantly outperform its one-stage counterpart with similar parameters and computations. Finally, we report experimental results on the semantic segmentation benchmark Cityscapes, in which our SPGNet attains 81.1\% on the test set using only 'fine' annotations.",
author = "Bowen Cheng and Uiuc Uiuc and Chen, \{Liang Chieh\} and Yunchao Wei and Yukun Zhu and Zilong Huang and Jinjun Xiong and Thomas Huang and Hwu, \{Wen Mei\} and Honghui Shi",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 17th IEEE/CVF International Conference on Computer Vision, ICCV 2019 ; Conference date: 27-10-2019 Through 02-11-2019",
year = "2019",
month = oct,
doi = "10.1109/ICCV.2019.00532",
language = "English",
series = "Proceedings of the IEEE International Conference on Computer Vision",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "5217--5227",
booktitle = "Proceedings - 2019 International Conference on Computer Vision, ICCV 2019",
address = "United States",
}