@inproceedings{5be51a5ab8a540f197f17ac9d1be345f,
title = "Combining linguistic and pictorial information: Using captions to interpret newspaper photographs",
abstract = "There are many situations where linguistic and pictorial data are jointly presented to communicate information. A computer model for synthesising information from the two sources requires an initial interpretation of both the text and the picture followed by consolidation of information. The problem of performing general-purpose vision (without apriori knowledge) would make this a nearly impossible task. However, in some situations, the text describes salient aspects of the picture. In such situations, it is possible to extract visual information from the text, resulting in a relational graph describing the structure of the accompanying picture. This graph can then be used by a computer vision system to guide the interpretation of the picture. This paper discusses an application whereby information obtained from parsing a caption of a newspaper photograph is used to identify human faces in the photograph. Heuristics are described for extracting information from the caption which contributes to the hypothesised structure of the picture. The top-down processing of the image using this information is discussed.",
author = "Srihari, \{Rohini K.\} and Rapaport, \{William J.\}",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag Berlin Heidelberg 1990.; 1st Annual Semantic Network Processing System Workshop, SNePS 1989 ; Conference date: 13-11-1989 Through 13-11-1989",
year = "1990",
doi = "10.1007/BFb0022085",
language = "English",
isbn = "9783540526261",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "85--96",
editor = "Deepak Kumar",
booktitle = "Current Trends in SNePS - Semantic Network Processing System - 1st Annual SNePS Workshop, Proceedings",
address = "Germany",
}