@inproceedings{dyer-etal-2016-practical,
title = "Practical Neural Networks for {NLP}: From Theory to Code",
author = "Dyer, Chris and
Goldberg, Yoav and
Neubig, Graham",
editor = "Yang, Bishan and
Hwa, Rebecca",
booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts",
month = nov,
year = "2016",
address = "Austin, Texas",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D16-2001",
abstract = "This tutorial aims to bring NLP researchers up to speed with the current techniques in deep learning and neural networks, and show them how they can turn their ideas into practical implementations. We will start with simple classification models (logistic regression and multilayer perceptrons) and cover more advanced patterns that come up in NLP such as recurrent networks for sequence tagging and prediction problems, structured networks (e.g., compositional architectures based on syntax trees), structured output spaces (sequences and trees), attention for sequence-to-sequence transduction, and feature induction for complex algorithm states. A particular emphasis will be on learning to represent complex objects as recursive compositions of simpler objects. This representation will reflect characterize standard objects in NLP, such as the composition of characters and morphemes into words, and words into sentences and documents. In addition, new opportunities such as learning to embed ``algorithm states'' such as those used in transition-based parsing and other sequential structured prediction models (for which effective features may be difficult to engineer by hand) will be covered.Everything in the tutorial will be grounded in code {---} we will show how to program seemingly complex neural-net models using toolkits based on the computation-graph formalism. Computation graphs decompose complex computations into a DAG, with nodes representing inputs, target outputs, parameters, or (sub)differentiable functions (e.g., ``tanh'', ``matrix multiply'', and ``softmax''), and edges represent data dependencies. These graphs can be run ``forward'' to make predictions and compute errors (e.g., log loss, squared error) and then ``backward'' to compute derivatives with respect to model parameters. In particular we'll cover the Python bindings of the CNN library. CNN has been designed from the ground up for NLP applications, dynamically structured NNs, rapid prototyping, and a transparent data and execution model.",
}
Markdown (Informal)
[Practical Neural Networks for NLP: From Theory to Code](https://aclanthology.org/D16-2001) (Dyer et al., EMNLP 2016)
ACL
- Chris Dyer, Yoav Goldberg, and Graham Neubig. 2016. Practical Neural Networks for NLP: From Theory to Code. In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts, Austin, Texas. Association for Computational Linguistics.