@inproceedings{alizadeh-di-eugenio-2020-corpus,
title = "A Corpus for Visual Question Answering Annotated with Frame Semantic Information",
author = "Alizadeh, Mehrdad and
Di Eugenio, Barbara",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.678",
pages = "5524--5531",
abstract = "Visual Question Answering (VQA) has been widely explored as a computer vision problem, however enhancing VQA systems with linguistic information is necessary for tackling the complexity of the task. The language understanding part can play a major role especially for questions asking about events or actions expressed via verbs. We hypothesize that if the question focuses on events described by verbs, then the model should be aware of or trained with verb semantics, as expressed via semantic role labels, argument types, and/or frame elements. Unfortunately, no VQA dataset exists that includes verb semantic information. We created a new VQA dataset annotated with verb semantic information called imSituVQA. imSituVQA is built by taking advantage of the imSitu dataset annotations. The imSitu dataset consists of images manually labeled with semantic frame elements, mostly taken from FrameNet.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alizadeh-di-eugenio-2020-corpus">
<titleInfo>
<title>A Corpus for Visual Question Answering Annotated with Frame Semantic Information</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mehrdad</namePart>
<namePart type="family">Alizadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Di Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Visual Question Answering (VQA) has been widely explored as a computer vision problem, however enhancing VQA systems with linguistic information is necessary for tackling the complexity of the task. The language understanding part can play a major role especially for questions asking about events or actions expressed via verbs. We hypothesize that if the question focuses on events described by verbs, then the model should be aware of or trained with verb semantics, as expressed via semantic role labels, argument types, and/or frame elements. Unfortunately, no VQA dataset exists that includes verb semantic information. We created a new VQA dataset annotated with verb semantic information called imSituVQA. imSituVQA is built by taking advantage of the imSitu dataset annotations. The imSitu dataset consists of images manually labeled with semantic frame elements, mostly taken from FrameNet.</abstract>
<identifier type="citekey">alizadeh-di-eugenio-2020-corpus</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.678</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>5524</start>
<end>5531</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Corpus for Visual Question Answering Annotated with Frame Semantic Information
%A Alizadeh, Mehrdad
%A Di Eugenio, Barbara
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F alizadeh-di-eugenio-2020-corpus
%X Visual Question Answering (VQA) has been widely explored as a computer vision problem, however enhancing VQA systems with linguistic information is necessary for tackling the complexity of the task. The language understanding part can play a major role especially for questions asking about events or actions expressed via verbs. We hypothesize that if the question focuses on events described by verbs, then the model should be aware of or trained with verb semantics, as expressed via semantic role labels, argument types, and/or frame elements. Unfortunately, no VQA dataset exists that includes verb semantic information. We created a new VQA dataset annotated with verb semantic information called imSituVQA. imSituVQA is built by taking advantage of the imSitu dataset annotations. The imSitu dataset consists of images manually labeled with semantic frame elements, mostly taken from FrameNet.
%U https://aclanthology.org/2020.lrec-1.678
%P 5524-5531
Markdown (Informal)
[A Corpus for Visual Question Answering Annotated with Frame Semantic Information](https://aclanthology.org/2020.lrec-1.678) (Alizadeh & Di Eugenio, LREC 2020)
ACL