thesis.bib

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Neural Machine Translation %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ARTICLE{systran16,
   author = {Josep Maria Crego and
                  Jungi Kim and
                  Guillaume Klein and
                  Anabel Rebollo and
                  Kathy Yang and
                  Jean Senellart and
                  Egor Akhanov and
                  Patrice Brunelle and
                  Aurelien Coquard and
                  Yongchao Deng and
                  Satoshi Enoue and
                  Chiyo Geiss and
                  Joshua Johanson and
                  Ardas Khalsa and
                  Raoum Khiari and
                  Byeongil Ko and
                  Catherine Kobus and
                  Jean Lorieux and
                  Leidiana Martins and
                  Dang{-}Chuan Nguyen and
                  Alexandra Priori and
                  Thomas Riccardi and
                  Natalia Segal and
                  Christophe Servan and
                  Cyril Tiquet and
                  Bo Wang and
                  Jin Yang and
                  Dakun Zhang and
                  Jing Zhou and
                  Peter Zoldan},
    title = "{SYSTRAN's Pure Neural Machine Translation Systems}",
  journal = {arXiv preprint 1610.05540},
     year = 2016,
}

@article{gnmt16,
  author    = {Yonghui Wu and
               Mike Schuster and
               Zhifeng Chen and
               Quoc V. Le and
               Mohammad Norouzi and
               Wolfgang Macherey and
               Maxim Krikun and
               Yuan Cao and
               Qin Gao and
               Klaus Macherey and
               Jeff Klingner and
               Apurva Shah and
               Melvin Johnson and
               Xiaobing Liu and
               Lukasz Kaiser and
               Stephan Gouws and
               Yoshikiyo Kato and
               Taku Kudo and
               Hideto Kazawa and
               Keith Stevens and
               George Kurian and
               Nishant Patil and
               Wei Wang and
               Cliff Young and
               Jason Smith and
               Jason Riesa and
               Alex Rudnick and
               Oriol Vinyals and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  title     = {Google's Neural Machine Translation System: Bridging the Gap between
               Human and Machine Translation},
  journal = {arXiv preprint 1609.08144},
  year      = {2016},
}

@InProceedings{cohn16,
  author    = {Cohn, Trevor  and  Hoang, Cong Duy Vu  and  Vymolova, Ekaterina  and  Yao, Kaisheng  and  Dyer, Chris  and  Haffari, Gholamreza},
  title     = {Incorporating Structural Alignment Biases into an Attentional Neural Translation Model},
  booktitle = {NAACL},
  year      = {2016},
}

@InProceedings{tu16coverage,
  author    = {Tu, Zhaopeng  and  Lu, Zhengdong  and  Liu, Yang  and  Liu, Xiaohua  and  Li, Hang},
  title     = {Modeling Coverage for Neural Machine Translation},
  booktitle = {ACL},
  year      = {2016},
}

@inproceedings{kim16distill,
  author    = {Yoon Kim and
               Alexander M. Rush},
  title     = {Sequence-Level Knowledge Distillation},
  booktitle = {EMNLP},
  year      = {2016},
}

@inproceedings{cheng16,
  author = {Cheng, Yong and Xu, Wei and He, Zhongjun and He, Wei and Wu, Hua and Sun, Maosong and Liu, Yang},
  booktitle = {ACL},
  title = {Semi-Supervised Learning for Neural Machine Translation.},
  year = 2016
}


@article{tu16,
  author       = {{Tu}, Z. and {Liu}, Y. and {Shang}, L. and {Liu}, X. and {Li}, H.},
  journal = {arXiv preprint 1611.01874},
  title        = {Neural Machine Translation with Reconstruction},
  year         = {2016},
}

@inproceedings{xia16,
  author       = {Yingce Xia and
               Di He and
               Tao Qin and
               Liwei Wang and
               Nenghai Yu and
               Tie{-}Yan Liu and
               Wei{-}Ying Ma},
  booktitle = {NIPS},
  title        = {Dual Learning for Machine Translation},
  year         = {2016},
}

@article{ha16,
  author       = {Thanh-Le Ha and Jan Niehues and Alexander Waibel},
  journal = {arXiv preprint 1611.04798},
  title        = {Toward Multilingual Neural Machine Translation with Universal Encoder and Decoder},
  year         = {2016},
}

@article{gnmt16multi,
  author       = {Melvin Johnson and Mike Schuster and Quoc V. Le and Maxim Krikun and Yonghui Wu and Zhifeng Chen and Nikhil Thorat and Fernanda Vi\'{e}gas and Martin Wattenberg and Greg Corrado and Macduff Hughes and Jeffrey Dean},
  journal = {arXiv preprint 1611.04558},
  title        = {Google's Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation},
  year         = {2016},
}

@InProceedings{firat16,
  author    = {Firat, Orhan  and  Cho, Kyunghyun  and  Bengio, Yoshua},
  title     = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention Mechanism},
  booktitle = {NAACL},
  year      = {2016},
}

@InProceedings{zoph16,
  author    = {Zoph, Barret  and  Knight, Kevin},
  title     = {Multi-Source Neural Translation},
  booktitle = {NAACL},
  year      = {2016},
}

@inproceedings{see16,
  author    = {See, Abigail and Luong, Minh-Thang and Manning, Christopher D.},
  booktitle = {CoNLL},
  title     = {Compression of Neural Machine Translation Models via Pruning},
  year      = {2016},
}

@inproceedings{luong16,
  author    = {Luong, Minh-Thang and Manning, Christopher D.},
  booktitle = {ACL},
  title     = {Achieving Open Vocabulary Neural Machine Translation with Hybrid Word-Character Models},
  year      = {2016},
}

@inproceedings{sennrich16mono,
  author       = {Rico Sennrich and Barry Haddow and Alexandra Birch},
  booktitle = {ACL},
  title        = {Improving neural machine translation models with monolingual data},
  year      = {2016},
}


@inproceedings{sennrich16sub,
  author       = {Rico Sennrich and Barry Haddow and Alexandra Birch},
  booktitle = {ACL},
  title        = {Neural Machine Translation of Rare Words with Subword Units},
  year      = {2016},
}

@inproceedings{luong16iclr,
  author    = {Minh-Thang Luong and Quoc V. Le and Ilya Sutskever and Oriol Vinyals and Lukasz Kaiser},
  booktitle = {ICLR},
  title     = {Multi-task sequence to sequence learning},
  year      = {2016},
}

@article{ling15char,
  author       = {Wang Ling and Isabel Trancoso and Chris Dyer and Alan Black},
  journal = {arXiv preprint arXiv:1511.04586},
  title        = {Character-based Neural Machine Translation},
  year         = {2015},
}

@inproceedings{iwslt15,
  author    = {Mauro Cettolo and Jan Niehues and Sebastian St{\"{u}}ker
               and Luisa Bentivogli
               and Roldano Cattoni 
               and Marcello Federico
              },
  booktitle = {IWSLT},
  title     = {The IWSLT 2015 Evaluation Campaign},
  year      = {2015},
}

@inproceedings{luong15iwslt,
  author    = {Luong, Minh-Thang and Manning, Christopher D.},
  booktitle = {IWSLT},
  title     = {Stanford Neural Machine Translation Systems for Spoken Language Domain},
  year      = {2015},
}

@inproceedings{jean15wmt,
  author    = {Jean, S\'{e}bastien and Firat, Orhan and Cho, Kyunghyun and Memisevic, Roland and Bengio, Yoshua},
  booktitle = {WMT},
  title     = {Montreal Neural Machine Translation Systems for {WMT}'15},
  year      = {2015},
}

@inproceedings{luong15attn,
  author    = {Luong, Minh-Thang and Pham, Hieu and Manning, Christopher D.},
  booktitle = {EMNLP},
  title     = {Effective Approaches to Attention-based Neural Machine Translation},
  year      = {2015},
}

@article{gulcehre2015using,
  author       = {Gulcehre, Caglar and Firat, Orhan and Xu, Kelvin and Cho, Kyunghyun and Barrault, Loic and Lin, Huei-Chi and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
  journal = {arXiv preprint arXiv:1503.03535},
  title        = {On Using Monolingual Corpora in Neural Machine Translation},
  year         = {2015},
}

@inproceedings{luong15,
  author    = {Minh-Thang Luong and Ilya Sutskever and Quoc V. Le and Oriol Vinyals and Wojciech Zaremba},
  booktitle = {ACL},
  title     = {Addressing the Rare Word Problem in Neural Machine Translation},
  year      = {2015},
}

@inproceedings{jean15,
  author    = {S\'{e}bastien Jean and Kyunghyun Cho and Roland Memisevic and Yoshua Bengio},
  booktitle = {ACL},
  title     = {On Using Very Large Target Vocabulary for Neural Machine Translation},
  year      = {2015},
}

@inproceedings{bog15,
  author    = {Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio},
  booktitle = {ICLR},
  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
  year      = {2015},
}

@inproceedings{sutskever14,
  author    = {Ilya Sutskever and Oriol Vinyals and Quoc V. Le},
  booktitle = {NIPS},
  title     = {Sequence to Sequence Learning with Neural Networks},
  year      = {2014},
}

@inproceedings{cho14,
  author    = {Kyunghyun Cho and Bart van Merrienboer and Caglar Gulcehre and Fethi Bougares and Holger Schwenk and Yoshua Bengio},
  booktitle = {EMNLP},
  title     = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical Machine Translation},
  year      = {2014},
}

@inproceedings{kal13,
  author    = {Nal Kalchbrenner and Phil Blunsom},
  booktitle = {EMNLP},
  title     = {Recurrent Continuous Translation Models},
  year      = {2013},
}

@article{forcada97,
  author       = {Mikel L. Forcada and Ram\'{o}n \~{N}eco},
  journal = {Biological and Artificial Computation: From Neuroscience to
Technology},
  publisher = {Springer},
  pages        = {453--462},
  title        = {Recursive hetero-associative memories for
translation},
  year         = {1997},
}

@article{chrisman91,
author = {Lonnie Chrisman},
title = {Learning Recursive Distributed Representations for Holistic Computation},
journal = {Connection Science},
volume = {3},
number = {4},
pages = {345-366},
year = {1991},
}

@INPROCEEDINGS{allen87,
    author = {Robert B. Allen},
    title = {Several studies on natural language and back-propagation},
    booktitle = {ICNN},
    year = {1987},
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Machine Translation %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@article{winograd_mt16,
  author       = {Ernest David},
  journal = {arXiv preprint arXiv:1608.01884},
  title        = {Winograd Schemas and Machine Translation},
  year         = {2016},
}

@InProceedings{luong15nlm,
  author    = {Luong, Minh-Thang  and  Kayser, Michael  and  Manning, Christopher D.},
  title     = {Deep Neural Language Models for Machine Translation},
  booktitle = {CoNLL},
  year      = {2015},
}

@inproceedings{chrf,
  author    = {Popovi\'{c}, Maja},
  booktitle = {WMT},
  title     = {{chrF: character n-gram F-score for automatic MT evaluation}},
  year      = {2015},
}

% Winning entry in WMT'15 for English-Czech
@inproceedings{bojar15wmt,
  author    = {Bojar, Ond\u{r}ej and Tamchyna, Ale\u{s}},
  booktitle = {WMT},
  title     = {{CUNI in WMT15: Chimera Strikes Again}},
  year      = {2015},
}

@InProceedings{bojar15,
  author    = {Bojar, Ond\v{r}ej  and  Chatterjee, Rajen  and  Federmann, Christian  and  Haddow, Barry  and  Huck, Matthias  and  Hokamp, Chris  and  Koehn, Philipp  and  Logacheva, Varvara  and  Monz, Christof  and  Negri, Matteo  and  Post, Matt  and  Scarton, Carolina  and  Specia, Lucia  and  Turchi, Marco},
  title     = {Findings of the 2015 Workshop on Statistical Machine Translation},
  booktitle = {WMT},
  year      = {2015},
}


% Winning entry in WMT'14 for English-German
@inproceedings{buck14,
  author    = {Christian Buck and Kenneth Heafield and Bas van Ooyen},
  booktitle = {LREC},
  title     = {N-gram Counts and Language Models from the Common Crawl},
  year      = {2014},
}


% Winning entry in WMT'14 for English-French
@inproceedings{durrani-EtAl:2014:W14-33,
  author    = {Durrani, Nadir and Haddow, Barry and Koehn, Philipp and Heafield, Kenneth},
  booktitle = {WMT},
  title     = {Edinburgh's Phrase-based Machine Translation Systems for {WMT}-14},
  year      = {2014},
}

@misc{wmt14_en_fr,
  author       = {Holger Schwenk},
  howpublished = {\url{http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/}},
  note         = {[Online; accessed 03-September-2014]},
  title        = {University Le Mans},
  year         = {2014},
}

@misc{statmt,
  author       = {statmt.org},
  howpublished = {\url{http://matrix.startmt.org/matrix/}},
  note         = {[Online; accessed 03-September-2014]},
  title        = {statmt.org},
  year         = {2014},
}

% ACL'14 best paper award, joint neural LM
@inproceedings{devlin14,
  author    = {Jacob Devlin and Rabih Zbib and Zhongqiang Huang and Thomas Lamar and Richard Schwartz and John Makhoul},
  booktitle = {ACL},
  title     = {Fast and Robust Neural Network Joint Models for Statistical Machine Translation},
  year      = {2014},
}

@online{mobiledevices,
  title = {There are officially more mobile devices than people in the world},
  author = {Zachary Davies Boren},
  year = 2014,
  howpublished = {\url{http://www.independent.co.uk/life-style/gadgets-and-tech/news/there-are-officially-more-mobile-devices-than-people-in-the-world-9780518.html}},
  note = {Accessed: 2016-09-10}
}

@online{solvemt,
  title = {Why Machines Alone Cannot Solve the World’s Translation Problem},
  author = {Nataly Kelly},
  year = 2014,
  howpublished = {\url{http://www.huffingtonpost.com/nataly-kelly/why-machines-alone-cannot-translation_b_4570018.html}},
  note = {Accessed: 2016-09-10}
}

@inproceedings{Auli13,
  author = {Auli, Michael and Galley, Michel and Quirk, Chris and Zweig, Geoffrey},
  booktitle = {ACL},
  title = {Joint Language and Translation Modeling with Recurrent Neural Networks.},
  year = 2013
}
@inproceedings{green-EtAl:2013:ACL2013,
  author    = {Green, Spence and Wang, Sida and Cer, Daniel and Manning, Christopher D.},
  booktitle = {ACL},
  title     = {Fast and Adaptive Online Training of Feature-Rich Translation Models},
  year      = {2013},
}

@inproceedings{vaswani13decode,
  author    = {Ashish Vaswani and Yinggong Zhao and Victoria Fossum and David Chiang},
  booktitle = {EMNLP},
  title     = {Decoding with large-scale neural language models improves translation},
  year      = {2013},
}

@inproceedings{zou13bilingual,
  author    = {Will Y. Zou and Richard Socher and Daniel Cer and Christopher D. Manning},
  booktitle = {EMNLP},
  title     = {Bilingual Word Embeddings for Phrase-Based Machine Translation},
  year      = {2013},
}

@article{mikolov13mt,
  author       = {Tom{\'{a}}{\v{s}} Mikolov and Quoc V. Le and Ilya Sutskever},
  ee           = {http://arxiv.org/abs/1309.4168},
  journal = {CoRR},
  title        = {Exploiting Similarities among Languages for Machine Translation},
  volume       = {abs/1309.4168},
  year         = {2013},
}

@inproceedings{heafield13,
  author = {Kenneth Heafield and Ivan Pouzyrevsky and Jonathan H. Clark and Philipp Koehn},
  title = {Scalable Modified {Kneser-Ney} Language Model Estimation},
  year = {2013},
  booktitle = {ACL},
}

@InProceedings{green13,
  author    = {Green, Spence  and  Wang, Sida  and  Cer, Daniel  and  Manning, Christopher D.},
  title     = {Fast and Adaptive Online Training of Feature-Rich Translation Models},
  booktitle = {ACL},
  year      = {2013},
}

@inproceedings{Schwenk12continuous,
  author    = {Holger Schwenk},
  booktitle = {COLING},
  title     = {Continuous Space Translation Models for Phrase-Based Statistical Machine Translation},
  year      = {2012},
}

@inproceedings{Son:2012:CST,
  author    = {Son, Le Hai and Allauzen, Alexandre and Yvon, François},
  booktitle = {NAACL-HLT},
  title     = {Continuous Space Translation Models with Neural Networks},
  year      = {2012},
}

@inproceedings{chiang09,
 author = {Chiang, David and Knight, Kevin and Wang, Wei},
 title = {11,001 New Features for Statistical Machine Translation},
 booktitle = {NAACL},
 year = {2009},
} 

@inproceedings{Pauls2011,
 author = {Pauls, Adam and Klein, Dan},
 title = {Faster and Smaller N-gram Language Models},
 booktitle = {ACL},
 year = {2011},
} 

% kenlm
@inproceedings{kenlm,
  author = {Kenneth Heafield},
  title = {{KenLM:} Faster and Smaller Language Model Queries},
  year = {2011},
  booktitle = {WMT},
}

% Phrasal
@inproceedings{cer10phrasal,
  author    = {Daniel Cer and Michel Galley and Daniel Jurafsky and Christopher D. Manning},
  booktitle = {ACL, Demonstration Session},
  title     = {Phrasal: A statistical machine translation toolkit for exploring new model features},
  year      = {2010},
}

% cdec
@inproceedings{dyer10cdec,
  author    = {Chris Dyer and Jonathan Weese and Hendra Setiawan and Adam Lopez and Ferhan Ture and Vladimir Eidelman and Juri Ganitkevitch and Phil Blunsom and Philip Resnik},
  booktitle = {ACL, Demonstration Session},
  title     = {cdec: A decoder, alignment, and learning framework for finite-state and context-free translation models},
  year      = {2010},
}

@book{koehn10smt,
 author = {Koehn, Philipp},
 title = {Statistical Machine Translation},
 year = {2010},
 edition = {1st},
 publisher = {Cambridge University Press},
} 

@misc{languages,
  title = {How many languages are there in the world?},
  author = {Stephen R. Anderson},
  year = {2010},
  howpublished = {\url{http://www.linguisticsociety.org/content/how-many-languages-are-there-world}},
  note = {Accessed: 2016-09-10}
}

@inproceedings{irstlm,
  author = {Federico, Marcello and Bertoldi, Nicola and Cettolo, Mauro},
  booktitle = {Interspeech},
  title = {{IRSTLM}: an open source toolkit for handling large scale language models},
  year = 2008
}

@inproceedings{galley08,
 author = {Galley, Michel and Manning, Christopher D.},
 title = {A Simple and Effective Hierarchical Phrase Reordering Model},
 booktitle = {EMNLP},
 year = {2008},
} 

@article{schwenk07,
 author = {Schwenk, Holger},
 title = {Continuous Space Language Models},
 journal = {Computer Speech and Languages},
 volume = {21},
 number = {3},
 year = {2007},
 pages = {492--518},
} 

@incollection{hutchins07,
  author = {W. John Hutchins},
  title = {Machine translation: A concise history},
  year = {2007},
  booktitle = {Computer Aided Translation: Theory and Practice},
  editor = {Chan Sin Wai},
  publisher = {Chinese University of Hong Kong},
  howpublished={\url{http://www.hutchinsweb.me.uk/CUHK-2006.pdf}},
}


% Moses
@inproceedings{koehn2007moses,
  author    = {Koehn, Philipp and Hoang, Hieu and Birch, Alexandra and Callison-Burch, Chris and Federico, Marcello and Bertoldi, Nicola and Cowan, Brooke and Shen, Wade and Moran, Christine and Zens, Richard and others},
  booktitle = {ACL, Demonstration Session},
  title     = {Moses: Open source toolkit for statistical machine translation},
  year      = {2007},
}

% Hiero
@article{chiang07hiero,
  author       = {David Chiang},
  journal = {Computational Linguistics},
  number       = {2},
  pages        = {201-228},
  title        = {Hierarchical phrase-based translation},
  volume       = {33},
  year         = {2007},
}

@INPROCEEDINGS{brants07,
    author = {Thorsten Brants and Ashok C. Popat and Peng Xu and Franz J. Och and Jeffrey Dean},
    title = {Large language models in machine translation},
    booktitle = {EMNLP},
    year = {2007},
}

@inproceedings{Liang:2006:EDA,
 author = {Liang, Percy and Bouchard-C\^{o}t{\'e}, Alexandre and Klein, Dan and Taskar, Ben},
 title = {An End-to-end Discriminative Approach to Machine Translation},
 booktitle = {ACL},
 year = {2006},
} 

% Berkeley aligner
@inproceedings{liang06alignment,
  author    = {Percy Liang and Ben Taskar and Dan Klein},
  booktitle = {NAACL},
  title     = {Alignment by Agreement},
  year      = {2006},
}

@inproceedings{chiang05,
 author = {Chiang, David},
 title = {A Hierarchical Phrase-based Model for Statistical Machine Translation},
 booktitle = {ACL},
 year = {2005},
} 

@article{och04,
 author = {Och, Franz Josef and Ney, Hermann},
 title = {The Alignment Template Approach to Statistical Machine Translation},
 journal = {Computational Linguistics},
 volume = {30},
 number = {4},
 year = {2004},
 pages = {417--449},
} 


@inproceedings{Koehn:2003:SMT,
  author    = {Koehn, Philipp and Och, Franz Josef and Marcu, Daniel},
  booktitle = {NAACL},
  title     = {Statistical Phrase-based Translation},
  year      = {2003},
}

@article{och03,
  author       = {Och, Franz Josef and Ney, Hermann},
  journal = {Computational Linguistics},
  number       = {1},
  pages        = {19-51},
  title        = {A Systematic Comparison of Various Statistical Alignment Models},
  volume       = {29},
  year         = {2003},
}

@inproceedings{och03mert,
 author = {Och, Franz Josef},
 title = {Minimum Error Rate Training in Statistical Machine Translation},
 booktitle = {ACL},
 year = {2003},
} 

@inproceedings{och02,
 author = {Och, Franz Josef and Ney, Hermann},
 title = {Discriminative Training and Maximum Entropy Models for Statistical Machine Translation},
 booktitle = {ACL},
 year = {2002},
} 

@inproceedings{Marcu:2002,
 author = {Marcu, Daniel and Wong, William},
 title = {A Phrase-based, Joint Probability Model for Statistical Machine Translation},
 booktitle = {EMNLP},
 year = {2002},
} 

% SRILM
@inproceedings{srilm,
    author = "Stolcke, Andreas",
    title = "{SRILM} -- An Extensible Language Modeling Toolkit",
    booktitle = "ICSLP",
    year = "2002",
}

@inproceedings{yamada01,
 author = {Yamada, Kenji and Knight, Kevin},
 title = {A Syntax-based Statistical Translation Model},
 booktitle = {ACL},
 year = {2001},
} 

@inproceedings{Rosenfeld2000,
  author = {Rosenfeld, Ronald},
  booktitle = {IEEE},
  pages = {1270-1278},
  title = {Two decades of statistical language modeling: Where do we go from
  here?},
  volume = 88,
  year = 2000
}

@incollection{hutchins2000early,
  title={{Warren Weaver and the launching of MT: brief biographical note}},
  author = {W. John Hutchins},
  booktitle={Early Years in Machine Translation: Memoirs and Biographies of Pioneers},
  pages={17-20},
  howpublished={\url{http://www.hutchinsweb.me.uk/Weaver-2000.pdf}},
  year={2000},
  publisher={John Benjamins}
}

@article{wu97,
 author = {Wu, Dekai},
 title = {Stochastic Inversion Transduction Grammars and Bilingual Parsing of Parallel Corpora},
 journal = {Computational Linguistics},
 volume = {23},
 number = {3},
 year = {1997},
 pages = {377--403},
} 

@article{berger96,
 author = {Berger, Adam L. and Pietra, Vincent J. Della and Pietra, Stephen A. Della},
 title = {A Maximum Entropy Approach to Natural Language Processing},
 journal = {Computational Linguistics},
 volume = {22},
 number = {1},
 year = {1996},
 pages = {39--71},
} 

% "the" SMT paper
@article{Brown:1993:MSM,
  author       = {Brown, Peter F. and Pietra, Vincent J. Della and Pietra, Stephen A. Della and Mercer, Robert L.},
  journal = {Computational Linguistics},
  month        = {06},
  number       = {2},
  pages        = {263--311},
  title        = {The Mathematics of Statistical Machine Translation: Parameter Estimation},
  volume       = {19},
  year         = {1993},
}

@inproceedings{Papineni02bleu,
  author    = {Kishore Papineni and Salim Roukos and Todd Ward and Wei-jing Zhu},
  booktitle = {ACL},
  title     = {{BLEU}: a Method for Automatic Evaluation of Machine Translation},
  year      = {2002},
}

@incollection{ibm701,
  author = {Peter Sheridan},
  title = {Research in language translation on the {IBM} type 701},
  year = {1955},
  number = {9},
  booktitle = {IBM Technical Newsletter},
  howpublished={\url{http://mt-archive.info/Sheridan-1955.pdf}},
}

@incollection{weaver49,
  address = {Cambridge, MA},
  author = {Weaver, Warren},
  booktitle = {Machine Translation of Languages},
  editor = {Locke, William N. and Boothe, A. Donald},
  note = {Reprinted from a memorandum written by Weaver in 1949.},
  pages = {15--23},
  publisher = {MIT Press},
  title = {Translation},
  year = {1949}
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Sequence to Sequence Models / RNNs %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ARTICLE{dozat16,
   author = {{Dozat}, Timothy and {Manning}, Christopher D.},
    title = "{Deep Biaffine Attention for Neural Dependency Parsing}",
  journal = {arXiv preprint arXiv:1611.01734},
     year = 2016,
}

@inproceedings{chen16,
    title={A Thorough Examination of the CNN/Daily Mail Reading Comprehension Task},
    author={Chen, Danqi and Bolton, Jason and Manning, Christopher D.},
    booktitle={ACL},
    year={2016}
}

@inproceedings{jia16,
  author = {Robin Jia and Percy Liang},
  booktitle = {ACL},
  title = {Data Recombination for Neural Semantic Parsing},
  year = {2016},
}

@inproceedings{gulcehre16,
  author    = {{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
               Sungjin Ahn and
               Ramesh Nallapati and
               Bowen Zhou and
               Yoshua Bengio},
  title     = {Pointing the Unknown Words},
  booktitle = {ACL},
  year      = {2016},
}

@inproceedings{gu16,
  author    = {Jiatao Gu and
               Zhengdong Lu and
               Hang Li and
               Victor O. K. Li},
  title     = {Incorporating Copying Mechanism in Sequence-to-Sequence Learning},
  booktitle = {ACL},
  year      = {2016},
}

@inproceedings{wiseman16,
  author    = {Sam Wiseman and
               Alexander M. Rush},
  title     = {Sequence-to-Sequence Learning as Beam-Search Optimization},
  booktitle = {EMNLP},
  year      = {2016},
}

@inproceedings{shen16,
  author    = {Shiqi Shen and
               Yong Cheng and
               Zhongjun He and
               Wei He and
               Hua Wu and
               Maosong Sun and
               Yang Liu},
  title     = {Minimum Risk Training for Neural Machine Translation},
  booktitle = {ACL},
  year      = {2016},
}

@article{bahdanau16actor,
  author    = {Dzmitry Bahdanau and
               Philemon Brakel and
               Kelvin Xu and
               Anirudh Goyal and
               Ryan Lowe and
               Joelle Pineau and
               Aaron C. Courville and
               Yoshua Bengio},
  title     = {An Actor-Critic Algorithm for Sequence Prediction},
  journal = {arXiv preprint arXiv:1607.07086},
  year      = {2016},
}

@inproceedings{ranzato16,
  author    = {Marc'Aurelio Ranzato and
               Sumit Chopra and
               Michael Auli and
               Wojciech Zaremba},
  title     = {Sequence Level Training with Recurrent Neural Networks},
  booktitle = {ICLR},
  year      = {2016},
}


@InProceedings{li16,
  author    = {Li, Jiwei  and  Galley, Michel  and  Brockett, Chris  and  Spithourakis, Georgios  and  Gao, Jianfeng  and  Dolan, Bill},
  title     = {A Persona-Based Neural Conversation Model},
  booktitle = {ACL},
  year      = {2016},
}


@article{alrfou16,
  author    = {Rami Al{-}Rfou and
               Marc Pickett and
               Javier Snaider and
               Yun{-}Hsuan Sung and
               Brian Strope and
               Ray Kurzweil},
  title     = {Conversational Contextual Cues: The Case of Personalization and History
               for Response Ranking},
  journal = {arXiv preprint arXiv:1606.00372},
  year      = {2016},
}

@inproceedings{serban16,
  author    = {Iulian Vlad Serban and
               Alessandro Sordoni and
               Yoshua Bengio and
               Aaron C. Courville and
               Joelle Pineau},
  title     = {Building End-To-End Dialogue Systems Using Generative Hierarchical
               Neural Network Models},
  booktitle = {AAAI},
  year      = {2016},
}

@inproceedings{bengio15,
title = {Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks},
author  = {Samy Bengio and Oriol Vinyals and Navdeep Jaitly and Noam M. Shazeer},
year  = 2015,
booktitle = {NIPS}
}


@inproceedings{jozefowicz15,
  author = {J\'{o}zefowicz, Rafal and Zaremba, Wojciech and Sutskever, Ilya},
  booktitle = {ICML},
  title = {An Empirical Exploration of Recurrent Network Architectures.},
  year = 2015
}

@article{greff15,
  author    = {Klaus Greff and
               Rupesh Kumar Srivastava and
               Jan Koutn{\'{\i}}k and
               Bas R. Steunebrink and
               J{\"{u}}rgen Schmidhuber},
  title     = {{LSTM:} {A} Search Space Odyssey},
  journal = {arXiv preprint arXiv:1503.04069},
  year      = {2015},
}

@inproceedings{li15,
  author    = {Li, Jiwei and Luong, Minh-Thang and Jurafsky, Dan},
  booktitle = {ACL},
  title     = {A Hierarchical Neural Autoencoder for Paragraphs and Documents.},
  year      = {2015},
}

@inproceedings{dai15,
  author    = {Andrew M. Dai and Quoc V. Le},
  booktitle = {NIPS},
  title     = {Semi-supervised Sequence Learning},
  year      = {2015},
}

@inproceedings{kiros15skip,
  author    = {Ryan Kiros and Yukun Zhu and Ruslan Salakhutdinov and Richard S. Zemel and Antonio Torralba and Raquel Urtasun and Sanja Fidler},
  booktitle = {NIPS},
  title     = {Skip-Thought Vectors},
  year      = {2015},
}

@inproceedings{vinyals15grammar,
  author    = {Vinyals, Oriol and Kaiser, Lukasz and Koo, Terry and Petrov, Slav and Sutskever, Ilya and Hinton, Geoffrey},
  booktitle = {NIPS},
  title     = {Grammar as a foreign language},
  year      = {2015},
}

@inproceedings{draw15,
  author    = {Karol Gregor and Ivo Danihelka and Alex Graves and Danilo Jimenez Rezende and Daan Wierstra},
  booktitle = {ICML},
  title     = {{DRAW:} {A} Recurrent Neural Network For Image Generation},
  year      = {2015},
}

@inproceedings{xu15,
  author    = {Kelvin Xu and Jimmy Ba and Ryan Kiros and Kyunghyun Cho and Aaron C. Courville and Ruslan Salakhutdinov and Richard S. Zemel and Yoshua Bengio},
  booktitle = {ICML},
  title     = {Show, Attend and Tell: Neural Image Caption Generation with Visual Attention},
  year      = {2015},
}
@inproceedings{vinyals15caption,
  author    = {Vinyals, Oriol and Toshev, Alexander and Bengio, Samy and Erhan, Dumitru},
  booktitle = {CVPR},
  title     = {Show and tell: A neural image caption generator},
  year      = {2015},
}

@incollection{mnih14,
  author    = {Mnih, Volodymyr and Heess, Nicolas and Graves, Alex and Kavukcuoglu, Koray},
  booktitle = {NIPS},
  title     = {Recurrent Models of Visual Attention},
  year      = {2014},
}
@article{graves14,
  author       = {Alex Graves and
                  Greg Wayne and
                  Ivo Danihelka},
  journal = {arXiv preprint arXiv:1410.5401},
  title        = {Neural {Turing} Machines},
  year         = {2014},
}

@article{zaremba14,
  author       = {Wojciech Zaremba and Ilya Sutskever and Oriol Vinyals},
  journal = {arXiv preprint arXiv:1409.2329},
  title        = {Recurrent Neural Network Regularization},
  year         = {2014},
}

@inproceedings{pham2014dropout,
  author       = {Pham, Vu and Bluche, Th\'{e}odore and Kermorvant, Christopher
    and Louradour, J\'{e}r\^{o}me},
  booktitle    = {ICFHR},
  title        = {Dropout improves recurrent neural networks for handwriting recognition},
  year         = {2014},
}

@inproceedings{graves13c,
  author    = {Alex Graves},
  booktitle = {Arxiv preprint arXiv:1308.0850},
  title     = {Generating Sequences With Recurrent Neural Networks},
  year      = {2013},
}

@inproceedings{bengio13,
  author = {Bengio, Yoshua and Boulanger-Lewandowski, Nicolas and Pascanu, Razvan},
  booktitle = {ICASSP},
  title = {Advances in optimizing recurrent networks.},
  year = 2013
}

@inproceedings{pascanu13,
  author    = {Razvan Pascanu and
               Tom{\'{a}}{\v{s}} Mikolov and
               Yoshua Bengio},
  title     = {On the difficulty of training recurrent neural networks},
  booktitle = {ICML},
  year      = {2013},
}

@inproceedings{mikolovLM,
  author    = {Mikolov, Tom{\'{a}}{\v{s}} and Zweig, Geoffrey},
  booktitle = {SLT},
  title     = {Context dependent recurrent neural network language model},
  year      = {2012},
}

@inproceedings{MikolovKBCK11,
  author    = {Mikolov, Tom{\'{a}}{\v{s}} and Kombrink, Stefan and Burget, Lukas and Cernocký, Jan and Khudanpur, Sanjeev},
  booktitle = {ICASSP},
  title     = {Extensions of recurrent neural network language model},
  year      = {2011},
}

@InProceedings{Sutskever11,
  author =    {Ilya Sutskever and James Martens and Geoffrey Hinton},
  title =     {Generating Text with Recurrent Neural Networks },
  booktitle = {ICML},
  year =      {2011},
}

@inproceedings{MikolovKBCK10,
  author    = {Mikolov, Tom{\'{a}}{\v{s}} and Martin Karafiát and Burget, Lukas and Cernocký, Jan and Khudanpur, Sanjeev},
  booktitle = {Interspeech},
  title     = {Recurrent neural network based language model},
  year      = {2010},
}

@incollection{graves09,
title = {Offline Handwriting Recognition with Multidimensional Recurrent Neural Networks},
author = {Graves, Alex and Schmidhuber, Juergen},
booktitle = {NIPS},
year = {2009},
}

@article{graves05,
  author    = {Alex Graves and
               J{\"{u}}rgen Schmidhuber},
  title     = {Framewise phoneme classification with bidirectional {LSTM} and other
               neural network architectures},
  journal   = {Neural Networks},
  volume    = {18},
  number    = {5-6},
  pages     = {602--610},
  year      = {2005},
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Deep Learning / Machine Learning %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{norouzi16,
  author    = {Mohammad Norouzi and
               Samy Bengio and
               Zhifeng Chen and
               Navdeep Jaitly and
               Mike Schuster and
               Yonghui Wu and
               Dale Schuurmans},
  title     = {Reward Augmented Maximum Likelihood for Neural Structured Prediction},
  booktitle = {NIPS},
  year      = {2016},
}

@article{gupta15,
  author    = {Suyog Gupta and
               Ankur Agrawal and
               Kailash Gopalakrishnan and
               Pritish Narayanan},
  title     = {Deep Learning with Limited Numerical Precision},
  journal = {arXiv preprint 1502.02551},
  year         = {2015},
}

@misc{karpathy15rnn,
  title = {The Unreasonable Effectiveness of Recurrent Neural Networks},
  author = {Andrej Karpathy},
  type = {Blog},
  year = {2015},
  howpublished =
  {\url{http://karpathy.github.io/2015/05/21/rnn-effectiveness/}},
  note = {Accessed: 2016-07-05}
}

@article{DBLP:journals/corr/GuptaAGN15,
  author    = {Suyog Gupta and
               Ankur Agrawal and
               Kailash Gopalakrishnan and
               Pritish Narayanan},
  title     = {Deep Learning with Limited Numerical Precision},
  journal   = {CoRR},
  volume    = {abs/1502.02551},
  year      = {2015},
  url       = {http://arxiv.org/abs/1502.02551},
  timestamp = {Mon, 02 Mar 2015 14:17:34 +0100},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/GuptaAGN15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@inproceedings{bhsne,
  author    = {Laurens van der Maaten},
  booktitle = {ICLR},
  title     = {{Barnes-Hut-SNE}},
  year      = {2013},
}


@inproceedings{kriz12,
  author    = {A. Krizhevsky and I. Sutskever and G.~E. Hinton},
  booktitle = {NIPS},
  title     = {{ImageNet} Classification with Deep Convolutional Neural Networks},
  year      = {2012},
}

@article{hinton12,
  author       = {G. Hinton and L. Deng and D. Yu and G. Dahl and A. Mohamed and N. Jaitly and A. Senior and V. Vanhoucke and P. Nguyen and T. Sainath and B. Kingsbury},
  journal = {IEEE Signal Processing Magazine},
  title        = {Deep Neural Networks for Acoustic Modeling in Speech Recognition},
  year         = {2012},
}

@PHDTHESIS{mikolov12,
   author = {Tom{\'{a}}{\v{s}} Mikolov},
   school = {Brno University of Technology},
   title = {Statistical Language Models Based on Neural Networks},
   year = {2012},
}

@phdthesis{sutskever12,
  author      = {Ilya Sutskever},
  school = {University of Toronto},
  title       = {Training Recurrent Neural Networks},
  year        = {2012},
}


@article{gutmann12nce,
  author       = {Michael Gutmann and Aapo Hyvärinen},
  journal = {JMLR},
  pages        = {307-361},
  title        = {Noise-Contrastive Estimation of Unnormalized Statistical Models, with Applications to Natural Image Statistics},
  volume       = {13},
  year         = {2012},
}

@inproceedings{MartensS11,
  author    = {James Martens and
               Ilya Sutskever},
  title     = {Learning Recurrent Neural Networks with {Hessian}-Free Optimization},
  booktitle = {ICML},
  year      = {2011},
}

@article{duchi11adagrad,
  author       = {John C. Duchi and Elad Hazan and Yoram Singer},
  journal = {JMLR},
  pages        = {2121-2159},
  title        = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
  volume       = {12},
  year         = {2011},
}

@misc{Ratliff_subgradient,
  author    = {Ratliff, Nathan D. and Bagnell, J. Andrew and Zinkevich, Martin A.},
  booktitle = {AISTATS},
  title     = {Online Subgradient Methods for Structured Prediction},
  year      = {2007},
}

@article{Jaeger2007,
        title = {Optimization and applications of echo state networks with leaky-integrator neurons},
        journal = {Neural Networks},
        volume = {20},
        number = {3},
        year = {2007},
        pages = {335-352},
        author = {Jaeger, Herbert and Mantas Luko{\v s}evi{\v c}ius and Dan Popovici and Udo Siewert}
}

@book{optimization,
  address = {New York, NY},
  author = {Nocedal, {Jorge} and Wright, {Stephen J.}},
  edition = {2},
  title = {Numerical optimization},
  year = 2006
}


@inproceedings{Teh2006,
 author = {Teh, Yee Whye},
 title = {A Hierarchical {Bayesian} Language Model Based on {Pitman-Yor} Processes},
 booktitle = {ACL},
 year = {2006},
} 


@inproceedings{Smith:2005:CET,
  author    = {Smith, Noah A. and Eisner, Jason},
  booktitle = {ACL},
  title     = {Contrastive estimation: training log-linear models on unlabeled data},
  year      = {2005},
}

@inproceedings{lafferty01,
 author = {Lafferty, John D. and McCallum, Andrew and Pereira, Fernando C. N.},
 title = {Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data},
 booktitle = {ICML},
 year = {2001},
} 

@article{Gers00,
 author = {Gers, Felix A. and Schmidhuber, J\"{u}rgen A. and Cummins, Fred A.},
 title = {Learning to Forget: Continual Prediction with {LSTM}},
 journal = {Neural Computation},
 volume = {12},
 number = {10},
 year = {2000},
 pages = {2451--2471},
 publisher = {MIT Press},
} 

@article{lstm97,
  author       = {Hochreiter, Sepp and Schmidhuber, J\"{u}rgen},
  journal = {Neural Computation},
  number       = {8},
  pages        = {1735--1780},
  title        = {Long Short-Term Memory},
  volume       = {9},
  year         = {1997},
}

@article{goller96,
  author       = {Goller, Christopher and K\"{u}chler, Andreas},
  journal = {IEEE Transactions on Neural Networks},
  pages        = {347--352},
  title        = {Learning Task-Dependent Distributed Representations by Backpropagation Through Structure},
  volume       = {1},
  year         = {1996},
}


@ARTICLE{lin96, 
  author={Tsungnan Lin and Bill G. Horne and Peter Ti\u{n}o and C. Lee Giles}, 
  journal={IEEE Transactions on Neural Networks}, 
  title={Learning long-term dependencies in NARX recurrent neural networks}, 
  year={1996}, 
  volume={7}, 
  number={6}, 
  pages={1329-1338}, 
}

@INPROCEEDINGS{el96,
    author = {El Hihi, Salah and Bengio, Yoshua},
     title = {Hierarchical Recurrent Neural Networks for Long-Term Dependencies},
      year = {1996},
  booktitle = {NIPS},
}
@ARTICLE{Bengio-trnn94,
    author = {Bengio, Yoshua and Simard, Patrice and Frasconi, Paolo},
     title = {Learning Long-Term Dependencies with Gradient Descent is Difficult},
   journal = {IEEE Transactions on Neural Networks},
    volume = {5},
    number = {2},
      year = {1994},
     pages = {157--166},
}

@TECHREPORT{bishop94,
    author = {Christopher M. Bishop},
    title = {Mixture density networks},
    institution = {Aston University},
    year = {1994}
}

@INPROCEEDINGS{reinforce,
    author = {Ronald J. Williams},
    title = {Simple statistical gradient-following algorithms for connectionist reinforcement learning},
    booktitle = {Machine Learning},
    year = {1992},
    pages = {229--256}
}

@inproceedings{elman90,
  author    = {Jeffrey L. Elman},
  title     = {Finding structure in time},
  booktitle = {Cognitive Science},
  year      = {1990}
}

@inproceedings{werbos1990,
  author    = {Paul J. Werbos},
  booktitle = {Proceedings of the IEEE},
  pages     = {1550--1560},
  title     = {Back propagation through time: What it does and how to do it.},
  volume    = {78},
  year      = {1990},
}

@incollection{waibel90,
 author = {Waibel, Alexander and Hanazawa, Toshiyuki and Hinton, Geofrey and Shikano, Kiyohiro and Lang, Kevin J.},
 chapter = {Phoneme Recognition Using Time-delay Neural Networks},
 title = {Readings in Speech Recognition},
 editor = {Waibel, Alex and Lee, Kai-Fu},
 year = {1990},
 isbn = {1-55860-124-4},
 pages = {393--404},
} 

@incollection{Rumelhart:1986:LPT,
  author    = {Rumelhart, David E. and McClelland, James L.},
  booktitle = {Parallel Distributed Processing. {Volume 2}: Psychological and Biological Models},
  editor    = {McClelland, J. L. and Rumelhart, D. E. and {{PDP Research Group}}},
  pages     = {216-271},
  publisher = {MIT Press},
  title     = {On Learning the Past Tenses of {English} Verbs},
  year      = {1986},
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Natural Language Processing %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% coherence corpus
@inproceedings{smith15,
  author    = {Sim Smith, Karin  and  Aziz, Wilker  and  Specia, Lucia},
  title     = {A Proposal for a Coherence Corpus in Machine Translation},
  booktitle = {Second Workshop on Discourse in Machine Translation},
  year      = {2015},
}

@book{Jurafsky:2009,
 author = {Jurafsky, Daniel and Martin, James H.},
 title = {Speech and Language Processing (2Nd Edition)},
 year = {2009},
 isbn = {0131873210},
 publisher = {Prentice-Hall, Inc.},
 address = {Upper Saddle River, NJ, USA},
} 


% Use Amazon mechanical turk in NLP
@inproceedings{Snow:2008:CFG,
  author    = {Snow, Rion and O'Connor, Brendan and Jurafsky, Daniel and Ng, Andrew Y.},
  booktitle = {EMNLP},
  title     = {Cheap and fast---but is it good?: {Evaluating} non-expert annotations for natural language tasks},
  year      = {2008},
}

% LDA
@article{Blei:2003:LDA,
  author       = {Blei, David M. and Ng, Andrew Y. and Jordan, Michael I.},
  journal = {Journal of Machine Learning Research},
  pages        = {993--1022},
  title        = {Latent Dirichlet Allocation},
  volume       = {3},
  year         = {2003},
}

% RST to MT
@inproceedings{marcu2000,
 author = {Marcu, Daniel and Carlson, Lynn and Watanabe, Maki},
 title = {The Automatic Translation of Discourse Structures},
 booktitle = {NAACL},
 year = {2000},
} 

% PTB
@article{Marcus:1993:BLA,
  author       = {Marcus, Mitchell P. and Marcinkiewicz, Mary Ann and Santorini, Beatrice},
  journal = {Computational Linguistics},
  number       = {2},
  pages        = {313--330},
  title        = {Building a Large Annotated Corpus of {English}: The {Penn} Treebank},
  volume       = {19},
  year         = {1993},
}

% Brown clustering
@article{Brown:1992:CNG,
  author       = {Brown, Peter F. and deSouza, Peter V. and Mercer, Robert L. and Pietra, Vincent J. Della and Lai, Jenifer C.},
  journal = {Computational Linguistics},
  number       = {4},
  pages        = {467--479},
  title        = {Class-based N-gram Models of Natural Language},
  volume       = {18},
  year         = {1992},
}

% RST
@article{mann1988,
  author = {Mann, William C and Thompson, Sandra A},
  number = 3,
  pages = {243--281},
  title = {Rhetorical structure theory: Toward a functional theory of text organization},
  volume = 8,
  year = 1988
}


@ARTICLE{katz87, 
  author={Slava Katz}, 
  journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, 
  title={Estimation of probabilities from sparse data for the language model
    component of a speech recognizer}, 
  year={1987}, 
  volume={35}, 
  number={3}, 
  pages={400-401}, 
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Character-level NLP %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{chan16,
  author       = {William Chan and Navdeep Jaitly and Quoc V. Le and Oriol Vinyals},
  booktitle = {ICASSP},
  title        = {Listen, Attend and Spell},
  year         = {2016},
}

@inproceedings{bahdanau16,
  author       = {Dzmitry Bahdanau and Jan Chorowski and Dmitriy Serdyuk and Philemon Brakel and Yoshua Bengio},
  booktitle = {ICASSP},
  title        = {End-to-End Attention-based Large Vocabulary Speech Recognition},
  year         = {2016},
}

@inproceedings{ling15function,
  author    = {Ling, Wang and Dyer, Chris and Black, Alan W. and Trancoso,
    Isabel and Fermandez, Ramon and Amir, Silvio and Marujo, Lu\'{i}s and
      Lu\'{i}s, Tiago},
  booktitle = {EMNLP},
  title     = {Finding Function in Form: Compositional Character Models for Open Vocabulary Word Representation.},
  year      = {2015},
}

@inproceedings{ballesteros15,
  author    = {Ballesteros, Miguel and Dyer, Chris and Smith, Noah A.},
  booktitle = {EMNLP},
  title     = {Improved Transition-based Parsing by Modeling Characters instead
    of Words with {LSTMs}},
  year      = {2015},
}

@article{rafal16,
  author       = {Rafal Jozefowicz and Oriol Vinyals and Mike Schuster and Noam Shazeer
    and Yonghui Wu},
  journal = {arXiv preprint arXiv:1602.02410},
  title        = {Exploring the Limits of Language Modeling},
  year         = {2016},
}

@inproceedings{zhang15,
  author    = {Xiang Zhang and Junbo Zhao and Yann LeCun},
  booktitle = {NIPS},
  title     = {Character-level Convolutional Networks for Text Classification},
  year      = {2015},
}

@inproceedings{kim16,
  author    = {Yoon Kim and Yacine Jernite and David Sontag and Alexander M. Rush},
  booktitle = {AAAI},
  title     = {Character-Aware Neural Language Models},
  year      = {2016},
}

@inproceedings{santos14,
  author    = {C{\'{\i}}cero Nogueira dos Santos and
               Bianca Zadrozny},
  title     = {Learning Character-level Representations for Part-of-Speech Tagging},
  booktitle = {ICML},
  year      = {2014},
}

%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Deep Learning NLP %%%
%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{pennington2014,
  author = {Jeffrey Pennington and Richard Socher and Christopher D. Manning},
  booktitle = {EMNLP},
  title = {GloVe: Global Vectors for Word Representation},
  year = {2014},
}

@inproceedings{chen2014,
  author    = {Chen, Danqi and Manning, Christopher D},
  booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
  title     = {A Fast and Accurate Dependency Parser using Neural Networks},
  year      = {2014},
}

@inproceedings{Socher-etal:2013:sentiment,
  author    = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning, Christopher D. and Ng, Andrew Y. and Potts, Christopher},
  booktitle = {EMNLP},
  title     = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
  year      = {2013},
}

@inproceedings{SocherEtAl2013:CVG,
  author    = {Richard Socher and John Bauer and Christopher D. Manning and Andrew Y. Ng},
  booktitle = {{ACL}},
  title     = {{Parsing With Compositional Vector Grammars}},
  year      = {2013},
}

@inproceedings{mikolov13iclr,
  author    = {Tom{\'{a}}{\v{s}} Mikolov and Kai Chen and Greg Corrado and Jeffrey Dean},
  booktitle = {ICLR},
  title     = {Efficient Estimation of Word Representations in Vector Space},
  year      = {2013},
}

@inproceedings{mnih13nce,
  author    = {Mnih, Andriy and Kavukcuoglu, Koray},
  booktitle = {NIPS},
  title     = {Learning word embeddings efficiently with noise-contrastive estimation},
  year      = {2013},
}

@inproceedings{mikolov13nips,
  author    = {Tom{\'{a}}{\v{s}} Mikolov and Ilya Sutskever and Kai Chen and Greg Corrado and Jeffrey Dean},
  booktitle = {NIPS},
  title     = {Distributed Representations of Words and Phrases and their Compositionality},
  year      = {2013},
}

@inproceedings{luong13,
  author    = {Luong, Minh-Thang and Socher, Richard and Manning, Christopher D.},
  booktitle = {CoNLL},
  title     = {Better Word Representations with Recursive Neural Networks for Morphology},
  year      = {2013},
}

@inproceedings{mikolov13regularities,
  author    = {Mikolov, Tom{\'{a}}{\v{s}} and Yih, Wen-tau and Zweig, Geoffrey},
  booktitle = {NAACL-HLT},
  title     = {Linguistic Regularities in Continuous Space Word Representations},
  year      = {2013},
}

@inproceedings{MnihTeh2012,
  author    = {Mnih, Andriy and Teh, Yee Whye},
  booktitle = {ICML},
  title     = {A fast and simple algorithm for training neural probabilistic language models},
  year      = {2012},
}

@inproceedings{Socher2012,
  author    = {R. Socher and B. Huval and C. D. Manning and A. Y. Ng},
  booktitle = {EMNLP},
  title     = {Semantic Compositionality Through Recursive Matrix-Vector Spaces},
  year      = {2012},
}

@article{Collobert2011jmlr,
  author       = {R. Collobert and J. Weston and L. Bottou and M. Karlen and K. Kavukcuoglu and P. Kuksa},
  journal = {JMLR},
  pages        = {2493--2537},
  title        = {Natural Language Processing (Almost) from Scratch},
  volume       = {12},
  year         = {2011},
}

@inproceedings{Socher2011,
  author    = {R. Socher and E. H. Huang and J. Pennington and A. Y. Ng and C. D. Manning},
  booktitle = {{NIPS}},
  title     = {Dynamic Pooling and Unfolding Recursive Autoencoders for Paraphrase Detection},
  year      = {2011},
}

@inproceedings{Socher2011b,
  author    = {R. Socher and J. Pennington and E. H. Huang and A. Y. Ng and C. D. Manning},
  booktitle = {EMNLP},
  title     = {Semi-Supervised Recursive Autoencoders for Predicting Sentiment Distributions},
  year      = {2011},
}

@inproceedings{SocherEtAl2011:RNN,
  author    = {R. Socher and Cliff C. Lin and A. Y. Ng and C. D. Manning},
  booktitle = {ICML},
  title     = {Parsing Natural Scenes and Natural Language with Recursive Neural Networks},
  year      = {2011},
}

@inproceedings{Glorot2011,
  author    = {Glorot, X. and Bordes, A. and Bengio, Y.},
  booktitle = {ICML},
  title     = {Domain Adaptation for Large-Scale Sentiment Classification: A Deep Learning Approach},
  year      = {2011},
}

@inproceedings{collobert2011distriminateParsing,
  author    = {R. Collobert},
  booktitle = {AISTATS},
  title     = {Deep Learning for Efficient Discriminative Parsing},
  year      = {2011},
}

@inproceedings{Socher10Phrase,
  author    = {Socher, Richard and Manning, Christopher and Ng, Andrew},
  booktitle = {NIPS*2010 Workshop on Deep Learning and Unsupervised Feature Learning},
  title     = {Learning Continuous Phrase Representations and Syntactic Parsing with Recursive Neural Networks},
  year      = {2010},
}

@inproceedings{Turian2010,
  author    = {Turian, J. and Ratinov, L. and Bengio, Y.},
  booktitle = {ACL},
  title     = {Word representations: a simple and general method for semi-supervised learning},
  year      = {2010},
}


@inproceedings{MnihHinton2009,
  author    = {Mnih, Andriy and Hinton, Geoffrey},
  booktitle = {NIPS},
  title     = {A Scalable Hierarchical Distributed Language Model},
  year      = {2009},
}

@inproceedings{Collobert2008,
  author    = {R. Collobert and J. Weston},
  booktitle = {ICML},
  title     = {A unified architecture for natural language processing: deep neural networks with multitask learning},
  year      = {2008},
}

@article{Bengio08,
  author = {Bengio, Yoshua and Sen\'{e}cal, Jean-S\'{e}bastien},
  journal = {IEEE Trans. Neural Networks},
  number = 4,
  pages = {713-722},
  title = {Adaptive Importance Sampling to Accelerate Training of a Neural Probabilistic Language Model.},
  volume = 19,
  year = 2008
}

@inproceedings{Mnih2007,
  author    = {Mnih, Andriy and Hinton, Geoffrey},
  booktitle = {ICML},
  title     = {Three new graphical models for statistical language modelling},
  year      = {2007},
}

@inproceedings{Morin2005,
  author    = {Morin, Frederic and Bengio, Yoshua},
  booktitle = {AISTATS},
  title     = {Hierarchical probabilistic neural network language model},
  year      = {2005},
}

@article{Bengio2003,
  author       = {Yoshua Bengio and R\'{e}jean Ducharme and Pascal Vincent and Christian Jauvin},
  journal = {JMLR},
  pages        = {1137--1155},
  title        = {A neural probabilistic language model},
  volume       = {3},
  year         = {2003},
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Multi-task learning %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{bordes12,
  author    = {Bordes, Antoine and Glorot, Xavier and Weston, Jason and Bengio, Yoshua},
  booktitle = {AISTATS},
  title     = {Joint Learning of Words and Meaning Representations for Open-Text Semantic Parsing},
  year      = {2012},
}

@inproceedings{chen14joint,
  author    = {Chen, Dongpeng and Mak, Brian and Leung, Cheung-Chi and Sivadas, Sunil},
  booktitle = {ICASSP},
  title     = {Joint acoustic modeling of triphones and trigraphemes by multi-task learning deep neural networks for low-resource speech recognition},
  year      = {2014},
}

@inproceedings{almeida13,
  author    = {Almeida, Miguel and Martins, André FT},
  booktitle = {ACL},
  title     = {Fast and Robust Compressive Summarization with Dual Decomposition and Multi-Task Learning.},
  year      = {2013},
}

@article{ando05,
  author       = {Ando, Rie Kubota and Zhang, Tong},
  journal = {JMLR},
  pages        = {1817--1853},
  title        = {A Framework for Learning Predictive Structures from Multiple Tasks and Unlabeled Data},
  volume       = {6},
  year         = {2005},
}

@inproceedings{kumar12,
  author    = {Abhishek Kumar and Hal Daum\'{e} III},
  booktitle = {ICML},
  title     = {Learning Task Grouping and Overlap in Multi-task Learning},
  year      = {2012},
}

@inproceedings{donahue14,
  author    = {Jeff Donahue and Yangqing Jia and Oriol Vinyals and Judy Hoffman and Ning Zhang and Eric Tzeng and Trevor Darrell},
  booktitle = {ICML},
  title     = {{DeCAF}: A Deep Convolutional Activation Feature for Generic Visual Recognition},
  year      = {2014},
}

@inproceedings{huang2013cross,
  author    = {Huang, Jui-Ting and Li, Jinyu and Yu, Dong and Deng, Li and Gong, Yifan},
  booktitle = {ICASSP},
  title     = {Cross-language knowledge transfer using multilingual deep neural network with shared hidden layers},
  year      = {2013},
}

@inproceedings{razavian2014cnn,
  author    = {Razavian, Ali S and Azizpour, Hossein and Sullivan, Josephine and Carlsson, Stefan},
  booktitle = {CVPR DeepVision workshop},
  title     = {{CNN} features off-the-shelf: an astounding baseline for recognition},
  year      = {2014},
}

@inproceedings{heigold13,
  author    = {Heigold, Georg and Vanhoucke, Vincent and Senior, Alan and Nguyen, Patrick and Ranzato, Marc'Aurelio and Devin, Matthieu and Dean, Jeffrey},
  booktitle = {ICASSP},
  title     = {Multilingual acoustic models using distributed deep neural networks},
  year      = {2013},
}

@inproceedings{deng13,
  author    = {Deng, Li and Li, Jinyu and Huang, Jui-Ting and Yao, Kaisheng and Yu, Dong and Seide, Frank and Seltzer, Mike and Zweig, Geoffrey and He, Xiaodong and Williams, Julia and others},
  booktitle = {ICASSP},
  title     = {Recent advances in deep learning for speech research at Microsoft},
  year      = {2013},
}

@article{argyriou08,
  author       = {Argyriou, Andreas and Evgeniou, Theodoros and Pontil, Massimiliano},
  journal = {Machine Learning},
  number       = {3},
  pages        = {243--272},
  title        = {Convex multi-task feature learning},
  volume       = {73},
  year         = {2008},
}

@inproceedings{evgeniou04,
  author    = {Evgeniou, Theodoros and Pontil, Massimiliano},
  booktitle = {SIGKDD},
  title     = {Regularized multi--task learning},
  year      = {2004},
}

@inproceedings{zhang12robust,
  author    = {Tianzhu Zhang and Bernard Ghanem and Si Liu and Narendra Ahuja},
  booktitle = {CVPR},
  title     = {Robust visual tracking via multi-task sparse learning},
  year      = {2012},
}

@article{yuan12,
  author       = {Xiao{-}Tong Yuan and Xiaobai Liu and Shuicheng Yan},
  journal = {{IEEE} Transactions on Image Processing},
  number       = {10},
  title        = {Visual Classification With Multitask Joint Sparse Representation},
  volume       = {21},
  year         = {2012},
}

@incollection{jalali10,
  author    = {Ali Jalali and Sanghavi, Sujay and Chao Ruan and Pradeep K. Ravikumar},
  booktitle = {NIPS},
  title     = {A Dirty Model for Multi-task Learning},
  year      = {2010},
}

@article{zhang12convex,
  author       = {Zhang, Yu and Yeung, Dit-Yan},
  journal = {CoRR},
  title        = {A Convex Formulation for Learning Task Relationships in Multi-Task Learning},
  volume       = {abs/1203.3536},
  year         = {2012},
}

@inproceedings{neubig15,
  author    = {Graham Neubig and Philip Arthur and Kevin Duh},
  booktitle = {NAACL},
  title     = {Multi-Target Machine Translation with Multi-Synchronous Context-free Grammars},
  year      = {2015},
}

@inproceedings{thrun96,
  author    = {Sebastian Thrun},
  booktitle = {NIPS},
  title     = {Is Learning The $n$-th Thing Any Easier Than Learning The First?},
  year      = {1996},
}

@inproceedings{argyriou07,
  author    = {Andreas Argyriou and Theodoros Evgeniou and Massimiliano Pontil},
  booktitle = {NIPS},
  title     = {Multi-task feature learning},
  year      = {2007},
}

@article{lounici09,
  author       = {Lounici, K. and Pontil, M. and Tsybakov, A. B. and Van De Geer, S.},
  journal = {COLT},
  title        = {{Taking advantage of sparsity in multi-task learning}},
  year         = {2009},
}

@article{caruana97,
  author       = {Caruana, Rich},
  journal = {Machine Learning},
  number       = {1},
  pages        = {41--75},
  title        = {Multitask Learning},
  volume       = {28},
  year         = {1997},
}

@inproceedings{liu15,
  author    = {Xiaodong Liu and Jianfeng Gao and Xiaodong He and Li Deng and Kevin Duh and Ye-Yi Wang},
  booktitle = {NAACL},
  title     = {Representation Learning Using Multi-Task Deep Neural Networks for Semantic Classification and Information Retrieval},
  year      = {2015},
}

@inproceedings{dong15,
  author    = {Dong, Daxiang and Wu, Hua and He, Wei and Yu, Dianhai and Wang, Haifeng},
  booktitle = {ACL},
  title     = {Multi-Task Learning for Multiple Language Translation},
  year      = {2015},
}

% ---------------------- PRUNING ----------------------
@article{le2015simple,
  title={A simple way to initialize recurrent networks of rectified linear units},
  author={Le, Quoc V. and Jaitly, Navdeep and Hinton, Geoffrey E.},
  journal={arXiv preprint arXiv:1504.00941},
  year={2015}
}

@inproceedings{han2015learning,
  title={Learning both Weights and Connections for Efficient Neural Network},
  author={Han, Song and Pool, Jeff and Tran, John and Dally, William},
  booktitle={NIPS},
  year={2015}
}

@inproceedings{han2015deep,
  title={Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding},
  author={Han, Song and Mao, Huizi and Dally, William J},
  booktitle={ICLR},
  year={2015}
}

@inproceedings{murray2015auto,
  title={Auto-Sizing Neural Networks: With Applications to n-gram Language Models},
  author={Murray, Kenton and Chiang, David},
  booktitle={EMNLP},
  year={2015}
}

@book{hassibi1993second,
  title={Second order derivatives for network pruning: Optimal brain surgeon},
  author={Hassibi, Babak and Stork, David G},
  year={1993},
  publisher={Morgan Kaufmann}
}

@inproceedings{lecun1989optimal,
  title={Optimal Brain Damage},
  author={Le Cun, Yann and Denker, John S and Solla, Sara A},
  booktitle={NIPS},
  year={1989}
}

@article{collins2014memory,
  title={Memory bounded deep convolutional networks},
  author={Collins, Maxwell D and Kohli, Pushmeet},
  journal={arXiv preprint arXiv:1412.1442},
  year={2014}
}

@inproceedings{srinivas2015data,
  title={Data-free parameter pruning for Deep Neural Networks},
  author={Srinivas, Suraj and Babu, R Venkatesh},
  booktitle={BMVC},
  year={2015}
}

@article{augasta2013pruning,
  title={Pruning algorithms of neural networks - a comparative study},
  author={Augasta, M. Gethsiyal and Kathirvalavakumar, T},
  journal={Central European Journal of Computer Science},
  volume={3},
  number={3},
  pages={105--115},
  year={2013},
  publisher={Springer}
}

% ---------------------- GENERAL COMPRESSION ----------------------


@inproceedings{lin2015neural,
  title={Neural Networks with Few Multiplications},
  author={Lin, Zhouhan and Courbariaux, Matthieu and Memisevic, Roland and Bengio, Yoshua},
  booktitle={ICLR},
  year={2016}
}

@inproceedings{hinton2015distilling,
  title={Distilling the knowledge in a neural network},
  author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
  booktitle={NIPS Deep Learning Workshop},
  year={2015}
}

@inproceedings{chen2015compressing,
  title={Compressing Neural Networks with the Hashing Trick},
  author={Chen, Wenlin and Wilson, James T and Tyree, Stephen and Weinberger, Kilian Q and Chen, Yixin},
  booktitle={ICML},
  year={2015}
}

@inproceedings{gupta2015deep,
  title={Deep Learning with Limited Numerical Precision},
  author={Gupta, Suyog and Agrawal, Ankur and Gopalakrishnan, Kailash and Narayanan, Pritish},
  booktitle={ICML},
  year={2015}
}

@inproceedings{courbariaux2015low,
  title={Low precision arithmetic for deep learning},
  author={Courbariaux, Matthieu and Bengio, Yoshua and David, Jean-Pierre},
  booktitle={ICLR workshop},
  year={2015}
}

@inproceedings{lu2016learning,
  title={Learning Compact Recurrent Neural Networks},
  author={Lu, Zhiyun and Sindhwani, Vikas and Sainath, Tara N},
  booktitle={ICASSP},
  year={2016}
}

@inproceedings{jaderberg2014speeding,
  title={Speeding up convolutional neural networks with low rank expansions},
  author={Jaderberg, Max and Vedaldi, Andrea and Zisserman, Andrew},
  booktitle={NIPS},
  year={2014}
}

@inproceedings{denton2014exploiting,
  title={Exploiting linear structure within convolutional networks for efficient evaluation},
  author={Denton, Emily L and Zaremba, Wojciech and Bruna, Joan and LeCun, Yann and Fergus, Rob},
  booktitle={NIPS},
  year={2014}
}

@article{iandola2016squeezenet,
  title={SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $<$ 1{MB} model size},
  author={Iandola, Forrest N and Moskewicz, Matthew W and Ashraf, Khalid and Han, Song and Dally, William J and Keutzer, Kurt},
  journal={arXiv preprint arXiv:1602.07360},
  year={2016}
}

@inproceedings{prabhavalkar2016compression,
  title={On the compression of recurrent neural networks with an application to {LVCSR} acoustic modeling for embedded speech recognition},
  author={Prabhavalkar, Rohit and Alsharif, Ouais and Bruguier, Antoine and McGraw, Ian},
  booktitle={ICASSP},
  year={2016}
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Vector-space Models %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{Dumais:1988:LSA,
  author    = {Dumais, S. T. and Furnas, G. W. and Landauer, T. K. and Deerwester, S. and Harshman, R.},
  booktitle = {CHI},
  title     = {Using Latent Semantic Analysis to Improve Access to Textual Information},
  year      = {1988},
}

@thesis{sahlgren06,
  author      = {Sahlgren, Magnus},
  institution = {Stockholm University},
  title       = {The Word-Space Model: Using Distributional Analysis to Represent Syntagmatic and Paradigmatic Relations between Words in High-Dimensional Vector Spaces},
  type        = {phdthesis},
  year        = {2006},
}

@article{Kim:2004:LTL,
  author       = {Kim, Woosung and Khudanpur, Sanjeev},
  journal = {Transactions on Asian Language Information Processing},
  month        = {06},
  number       = {2},
  pages        = {94--112},
  title        = {Lexical Triggers and Latent Semantic Analysis for Cross-lingual Language Model Adaptation},
  volume       = {3},
  year         = {2004},
}

@inproceedings{Mimno:2009:PTM,
  author    = {Mimno, David and Wallach, Hanna M. and Naradowsky, Jason and Smith, David A. and McCallum, Andrew},
  booktitle = {EMNLP},
  pages     = {880--889},
  title     = {Polylingual Topic Models},
  year      = {2009},
}

@article{Turney:2010:FMV,
  author       = {Turney, Peter D. and Pantel, Patrick},
  journal = {Journal of Artificial Intelligence Research},
  number       = {1},
  pages        = {141--188},
  title        = {From Frequency to Meaning: Vector Space Models of Semantics},
  volume       = {37},
  year         = {2010},
}

@inproceedings{vulic2011identifying,
  author    = {Vulic, Ivan and De Smet, Wim and Moens, Marie-Francine and Leuven, KU},
  booktitle = {ACL-HLT},
  title     = {Identifying word translations from comparable corpora using latent topic models},
  year      = {2011},
}

@inproceedings{Rapp99,
  author    = {Rapp, Reinhard},
  booktitle = {ACL},
  title     = {Automatic Identification of Word Translations from Unrelated English and German Corpora},
  year      = {1999},
}

@inproceedings{gaussier-EtAl:2004:ACL,
  author    = {Gaussier, Eric and Renders, J.M. and Matveeva, I. and Goutte, C. and Dejean, H.},
  booktitle = {ACL},
  title     = {A Geometric View on Bilingual Lexicon Extraction from Comparable Corpora},
  year      = {2004},
}

@inproceedings{Fung04mining,
  author    = {Pascale Fung and Percy Cheung},
  booktitle = {EMNLP},
  title     = {Mining Very Non-Parallel Corpora: Parallel Sentence and Lexicon Extraction via Bootstrapping and {EM}},
  year      = {2004},
}

@article{tackstrom-etal-2013,
  author       = {Oscar T\"{a}ckstr\"{o}m and Dipanjan Das and Slav Petrov and Ryan McDonald and Joakim Nivre},
  journal = {Transactions of the Association for Computational Linguistics},
  pages        = {1-12},
  title        = {Token and Type Constraints for Cross-Lingual Part-of-Speech Tagging},
  volume       = {1},
  year         = {2013},
}

@article{Pado:2009:crossSRL,
  author       = {Pad\'{o}, Sebastian and Lapata, Mirella},
  journal = {Journal of Artificial Intelligence Research},
  number       = {1},
  pages        = {307--340},
  title        = {Cross-lingual Annotation Projection of Semantic Roles},
  volume       = {36},
  year         = {2009},
}

@article{LevowOR05:crossIR,
  author       = {Levow, Gina-Anne and Oard, Douglas W. and Resnik, Philip},
  journal = {Information Processing and Management},
  number       = {3},
  pages        = {523-547},
  title        = {Dictionary-based techniques for cross-language information retrieval},
  volume       = {41},
  year         = {2005},
}

@inproceedings{durrett-pauls-klein:2012:EMNLP-CoNLL,
  author    = {Durrett, Greg and Pauls, Adam and Klein, Dan},
  booktitle = {EMNLP-CoNLL},
  title     = {Syntactic Transfer Using a Bilingual Lexicon},
  year      = {2012},
}

@inproceedings{vulic-moens:2013:EMNLP,
  author    = {Vuli\'{c}, Ivan and Moens, Marie-Francine},
  booktitle = {EMNLP},
  title     = {A Study on Bootstrapping Bilingual Vector Spaces from Non-Parallel Data (and Nothing Else)},
  year      = {2013},
}

@inproceedings{Garera:2009:ITL,
  author    = {Garera, Nikesh and Callison-Burch, Chris and Yarowsky, David},
  booktitle = {CoNLL},
  title     = {Improving Translation Lexicon Induction from Monolingual Corpora via Dependency Contexts and Part-of-speech Equivalences},
  year      = {2009},
}

@inproceedings{Boyd-Graber:2009:MTM,
  author    = {Boyd-Graber, Jordan and Blei, David M.},
  booktitle = {UAI},
  title     = {Multilingual Topic Models for Unaligned Text},
  year      = {2009},
}

@inproceedings{Platt:2010:TDR,
  author    = {Platt, John C. and Toutanova, Kristina and Yih, Wen-tau},
  booktitle = {EMNLP},
  title     = {Translingual Document Representations from Discriminative Projections},
  year      = {2010},
}

@inproceedings{HaghighiLBK08,
  author    = {Haghighi, Aria and Liang, Percy and Berg-Kirkpatrick, Taylor and Klein, Dan},
  booktitle = {ACL},
  title     = {Learning Bilingual Lexicons from Monolingual Corpora},
  year      = {2008},
}

@inproceedings{Sumita:2000:lexical,
  author    = {Sumita, Eiichiro},
  booktitle = {ACL},
  title     = {Lexical transfer using a vector-space model},
  year      = {2000},
}

@inproceedings{Peirsman:2010:CIS,
  author    = {Peirsman, Yves and Pad\'{o}, Sebastian},
  booktitle = {NAACL-HLT},
  title     = {Cross-lingual Induction of Selectional Preferences with Bilingual Vector Spaces},
  year      = {2010},
}

@inproceedings{Boyd-Graber:2010:HSA,
  author    = {Boyd-Graber, Jordan and Resnik, Philip},
  booktitle = {EMNLP},
  title     = {Holistic Sentiment Analysis Across Languages: Multilingual Supervised Latent Dirichlet Allocation},
  year      = {2010},
}

@inproceedings{Ruiz:2011:TAL,
  author    = {Ruiz, Nick and Federico, Marcello},
  booktitle = {WMT},
  title     = {Topic Adaptation for Lecture Translation Through Bilingual Latent Semantic Models},
  year      = {2011},
}

@inproceedings{Tam07biLSA-spoken,
  author    = {Tam, Yik-Cheung and Schultz, Tanja},
  booktitle = {Interspeech},
  title     = {Bilingual {LSA}-based translation lexicon adaptation for spoken language translation.},
  year      = {2007},
}

@inproceedings{ZhaoXing2007,
  author    = {Zhao, Bing and Xing, Eric P.},
  booktitle = {NIPS},
  title     = {{HM-BiTAM: Bilingual Topic Exploration, Word Alignment, and Translation}},
  year      = {2007},
}

@inproceedings{Zhao06bitam:bilingual,
  author    = {Bing Zhao and Eric P. Xing},
  booktitle = {ACL},
  title     = {{BiTAM}: Bilingual topic admixture models for word alignment},
  year      = {2006},
}

@inproceedings{Yarowsky:2001:IMP,
  author    = {Yarowsky, David and Ngai, Grace},
  booktitle = {NAACL},
  title     = {Inducing Multilingual POS Taggers and NP Bracketers via Robust Projection Across Aligned Corpora},
  year      = {2001},
}

@inproceedings{green-EtAl:2011:Entity,
  author    = {Spence Green and Nicholas Andrews and Matthew R. Gormley and Mark Dredze and Christopher D. Manning},
  booktitle = {NAACL},
  month     = {June},
  title     = {Entity Clustering Across Languages},
  year      = {2012},
}

@inproceedings{wang-che-manning:2013:ACL2013,
  author    = {Wang, Mengqiu and Che, Wanxiang and Manning, Christopher D.},
  booktitle = {ACL},
  title     = {Joint Word Alignment and Bilingual Named Entity Recognition Using Dual Decomposition},
  year      = {2013},
}

@inproceedings{jin12ws353zh,
  author    = {Jin, Peng and Wu, Yunfang},
  booktitle = {SemEval},
  title     = {SemEval-2012 Task 4: Evaluating Chinese Word Similarity},
  year      = {2012},
}

@article{miller-91,
  author       = {Miller, George A. and Charles, Walter G.},
  journal = {Language and Cognitive Processes},
  number       = {1},
  pages        = {1--28},
  title        = {{Contextual correlates of semantic similarity}},
  volume       = {6},
  year         = {1991},
}

@article{FinkelsteinGMRSWR02ws353,
  author       = {Finkelstein, Lev and Gabrilovich, Evgeniy and Matias, Yossi and Rivlin, Ehud and Solan, Zach and Wolfman, Gadi and Ruppin, Eytan},
  journal = {ACM Transactions on Information Systems},
  number       = {1},
  pages        = {116-131},
  title        = {Placing search in context: the concept revisited},
  volume       = {20},
  year         = {2002},
}

@article{Rubenstein:1965:CCS,
  author       = {Rubenstein, Herbert and Goodenough, John B.},
  issue_date   = {Oct. 1965},
  journal = {Communications of the ACM},
  number       = {10},
  pages        = {627--633},
  title        = {Contextual correlates of synonymy},
  volume       = {8},
  year         = {1965},
}

@article{Rohde06animproved,
  author       = {Douglas L. T. Rohde and Laura M. Gonnerman and David C. Plaut},
  journal = {Communications of the ACM},
  pages        = {627--633},
  title        = {An improved model of semantic similarity based on lexical co-occurence},
  volume       = {8},
  year         = {2006},
}

@article{Miller1995,
  author       = {Miller, G.A.},
  journal = {Communications of the ACM},
  title        = {{WordNet: A Lexical Database for English}},
  year         = {1995},
}

@article{nelson2005wha,
  author       = {Nelson, D.L. and Dyrdal, G.M. and Goodmon, L.B.},
  journal = {Psychon Bull Rev},
  number       = {4},
  pages        = {711-719},
  title        = {What is preexisting strength? Predicting free association probabilities, similarity ratings, and cued recall probabilities.},
  volume       = {12},
  year         = {2005},
}

@inproceedings{HuangEtAl2012,
  author    = {E. H. Huang and R. Socher and C. D. Manning and A. Y. Ng},
  booktitle = {ACL},
  title     = {Improving Word Representations via Global Context and Multiple Word Prototypes},
  year      = {2012},
}

@inproceedings{reisinger:naacl10,
  author    = {Reisinger, Joseph and Mooney, Raymond J.},
  booktitle = {NAACL},
  title     = {Multi-Prototype Vector-Space Models of Word Meaning},
  year      = {2010},
}

@misc{shaoul10westbury,
  author       = {Shaoul, Cyrus and Westbury, Chris},
  howpublished = {Edmonton, AB: University of Alberta},
  title        = {The {Westbury} Lab Wikipedia Corpus},
  year         = {2010},
}


%%% Uncategorized %%%
@article{jan14,
  author       = {Jan Chorowski and Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio},
  journal = {CoRR},
  title        = {End-to-end Continuous Speech Recognition using Attention-based Recurrent {NN:} First Results},
  volume       = {abs/1412.1602},
  year         = {2014},
}

@article{fraser07,
  author       = {Fraser, Alexander and Marcu, Daniel},
  journal = {Computational Linguistics},
  number       = {3},
  pages        = {293--303},
  title        = {Measuring Word Alignment Quality for Statistical Machine Translation},
  volume       = {33},
  year         = {2007},
}


@inproceedings{freitag14,
  author    = {Freitag, Markus and Peitz, Stephan and Wuebker, Joern and Ney, Hermann and Huck, Matthias and Sennrich, Rico and Durrani, Nadir and Nadejde, Maria and Williams, Philip and Koehn, Philipp and Herrmann, Teresa and Cho, Eunah and Waibel, Alex},
  booktitle = {WMT},
  title     = {EU-BRIDGE MT: Combined Machine Translation},
  year      = {2014},
}

@inproceedings{koehn2002learning,
  author    = {Koehn, Philipp and Knight, Kevin},
  booktitle = {ACL},
  title     = {Learning a translation lexicon from monolingual corpora},
  year      = {2002},
}

@inproceedings{Axelrod:2011:DAV,
  author    = {Axelrod, Amittai and He, Xiaodong and Gao, Jianfeng},
  booktitle = {EMNLP},
  title     = {Domain Adaptation via Pseudo In-domain Data Selection},
  year      = {2011},
}


@inproceedings{dyer13fastalign,
  author    = {Dyer, Chris and Chahuneau, Victor and Smith, Noah A.},
  booktitle = {NAACL-HLT},
  title     = {A Simple, Fast, and Effective Reparameterization of IBM Model 2},
  year      = {2013},
}

@inproceedings{Daume:2011:DAM,
  author    = {Daumé,III, Hal and Jagarlamudi, Jagadeesh},
  booktitle = {ACL-HLT},
  title     = {Domain Adaptation for Machine Translation by Mining Unseen Words},
  year      = {2011},
}

@inproceedings{Ni09multi,
  author    = {Ni, Xiaochuan and Sun, Jian-Tao and Hu, Jian and Chen, Zheng},
  booktitle = {WWW},
  title     = {Mining multilingual topics from wikipedia.},
  year      = {2009},
}

@inproceedings{alexandrescu2006factoredNLM,
  author    = {Alexandrescu, Andrei and Kirchhoff, Katrin},
  booktitle = {NAACL},
  title     = {Factored Neural Language Models},
  url       = {http://www.aclweb.org/anthology/N/N06/N06-2001},
  year      = {2006},
}

@article{Creutz:2007,
  author       = {Creutz, Mathias and Lagus, Krista},
  issue        = {1},
  journal = {ACM Transactions on Speech and Language Processing},
  pages        = {3:1--3:34},
  title        = {Unsupervised models for morpheme segmentation and morphology learning},
  volume       = {4},
  year         = {2007},
}

@inproceedings{koo:08,
  author    = {Koo, Terry and Carreras, Xavier and Collins, Michael},
  booktitle = {ACL},
  title     = {Simple semi-supervised dependency parsing},
  year      = {2008},
}

@inproceedings{Ratinov09,
  author    = {Ratinov, Lev and Roth, Dan},
  booktitle = {CoNLL},
  title     = {Design Challenges and Misconceptions in Named Entity Recognition},
  year      = {2009},
}

@inproceedings{Clark03,
  author    = {Clark, Alexander},
  booktitle = {EACL},
  title     = {Combining distributional and morphological information for part of speech induction},
  year      = {2003},
}

@article{plunkett91ushaped,
  author       = {Plunkett, K. and Marchman, V.},
  journal = {Cognition},
  number       = {1},
  pages        = {43--102},
  priority     = {3},
  title        = {U-shaped learning and frequency effects in a multi-layered perceptron: implications for child language acquisition},
  volume       = {38},
  year         = {1991},
}

@inproceedings{GasserL90,
  author    = {Gasser, Michael and Lee, Chan-Do},
  booktitle = {NIPS},
  title     = {A Short-Term Memory Architecture for the Learning of Morphophonemic Rules},
  year      = {1990},
}

@inproceedings{Gasser94,
  author    = {Gasser, Michael},
  booktitle = {ACL},
  title     = {Acquiring Receptive Morphology: A Connectionist Model},
  year      = {1994},
}

@inproceedings{Angeliki13compositional,
  author    = {Lazaridou, Angeliki and Marelli, Marco and Zamparelli, Roberto and Baroni, Marco},
  booktitle = {ACL},
  title     = {Compositional-ly Derived Representations of Morphologically Complex Words in Distributional Semantics},
  year      = {2013},
}