@article{cauchy1847methode,
  author  = {Cauchy, A.},
  journal = {Comptes Rendus de l'Académie des Sciences},
  title   = {Methode generale pour la resolution des systemes d'equations simultanees},
  volume  = 25,
  year    = 1847
}
@incollection{whitney1992congruent,
  author    = {Whitney, Hassler},
  booktitle = {Hassler Whitney Collected Papers},
  pages     = {61--79},
  title     = {Congruent graphs and the connectivity of graphs},
  year      = 1932
}
@article{thompson1933likelihood,
  author    = {Thompson, William R},
  journal   = {Biometrika},
  number    = {3-4},
  pages     = {285--294},
  publisher = {Oxford University Press},
  title     = {On the likelihood that one unknown probability exceeds another in view of the evidence of two samples},
  volume    = 25,
  year      = 1933
}
@article{thompson1935theory,
  author    = {Thompson, William R},
  journal   = {American Journal of Mathematics},
  number    = 2,
  pages     = {450--456},
  publisher = {JSTOR},
  title     = {On the theory of apportionment},
  volume    = 57,
  year      = 1935
}
@article{mcculloch1943logical,
  author    = {McCulloch, Warren S and Pitts, Walter},
  journal   = {The Bulletin of Mathematical Biophysics},
  number    = 4,
  pages     = {115--133},
  publisher = {Springer},
  title     = {A logical calculus of the ideas immanent in nervous activity},
  volume    = 5,
  year      = 1943
}
@article{robbins1951stochastic,
  abstract  = {Let M(x) denote the expected value at level x of the response to a certain experiment. M(x) is assumed to be a monotone function of x but is unknown to the experimenter, and it is desired to find the solution x = θ of the equation M(x) = α, where α is a given constant. We give a method for making successive experiments at levels x1,x2,⋯ in such a way that xn will tend to θ in probability.},
  author    = {Herbert Robbins and Sutton Monro},
  issn      = {00034851},
  journal   = {The Annals of Mathematical Statistics},
  number    = 3,
  pages     = {400--407},
  publisher = {Institute of Mathematical Statistics},
  title     = {A Stochastic Approximation Method},
  volume    = 22,
  year      = 1951
}
@article{rosenblatt1958perceptron,
  author    = {Rosenblatt, Frank},
  journal   = {Psychological review},
  number    = 6,
  pages     = 386,
  publisher = {American Psychological Association},
  title     = {The perceptron: {A} probabilistic model for information storage and organization in the brain},
  volume    = 65,
  year      = 1958
}
@book{howard1960dynamic,
  author    = {Howard, Ronald A},
  publisher = {Wiley},
  title     = {Dynamic programming and {N}arkov processes},
  year      = 1960
}
@article{POLYAK19641some,
  author    = {Polyak, Boris T},
  journal   = {USSR Computational Mathematics and Mathematical Physics},
  number    = 5,
  pages     = {1--17},
  publisher = {Elsevier},
  title     = {Some methods of speeding up the convergence of iteration methods},
  volume    = 4,
  year      = 1964
}
@article{bellman1966dynamic,
  author    = {Bellman, Richard},
  journal   = {Science},
  number    = 3731,
  pages     = {34--37},
  publisher = {American Association for the Advancement of Science},
  title     = {Dynamic programming},
  volume    = 153,
  year      = 1966
}
@article{weisfeiler1968reduction,
  author  = {Weisfeiler, Boris and Leman, Andrei},
  journal = {NTI, Series},
  number  = 9,
  pages   = {12--16},
  title   = {The reduction of a graph to canonical form and the algebra which appears therein},
  volume  = 2,
  year    = 1968
}
@book{minsky1969perceptrons,
  author    = {Minsky, Marvin and Papert, Seymour A},
  publisher = {MIT {P}ress},
  title     = {Perceptrons: An introduction to computational geometry},
  year      = 1969
}
@article{fukushima1969visual,
  author  = {Fukushima, Kunihiko},
  doi     = {10.1109/TSSC.1969.300225},
  journal = {IEEE Transactions on Systems Science and Cybernetics},
  number  = 4,
  pages   = {322--333},
  title   = {Visual Feature Extraction by a Multilayered Network of Analog Threshold Elements},
  volume  = 5,
  year    = 1969
}
@article{andreae1969learning,
  author    = {Andreae, John},
  journal   = {Encyclopaedia of Linguistics, Information and Control},
  month     = {01},
  pages     = {261--270},
  publisher = {Pergamon Press},
  title     = {Learning Machines: A Unified View},
  year      = 1969
}
@incollection{vapnik2015uniform,
  author    = {Vapnik, Vladimir N and Chervonenkis, A Ya},
  booktitle = {Measures of Complexity},
  pages     = {11--30},
  title     = {On the uniform convergence of relative frequencies of events to their probabilities},
  year      = 1971
}
@book{Rawls-1971,
  author    = {John Rawls},
  publisher = {Belknap Press},
  title     = {A Theory of Justice},
  year      = 1971
}
@article{werbos1974beyond,
  author  = {Werbos, Paul},
  journal = {Ph.D. dissertation, Harvard University},
  title   = {Beyond regression: New tools for prediction and analysis in the behavioral sciences},
  year    = 1974
}
@book{Braverman-1974,
  author    = {Harry Braverman},
  publisher = {Monthly Review Press},
  title     = {Labor and monopoly capital: the degradation of work in the twentieth century},
  year      = 1974
}
@book{zaslavsky1975facing,
  author    = {Zaslavsky, Thomas},
  publisher = {Memoirs of the American Mathematical Society},
  title     = {Facing up to arrangements: Face-count formulas for partitions of space by hyperplanes: Face-count formulas for partitions of space by hyperplanes},
  year      = 1975
}
@article{dempster1977maximum,
  author    = {Dempster, Arthur P and Laird, Nan M and Rubin, Donald B},
  journal   = {Journal of the Royal Statistical Society: Series B},
  number    = 1,
  pages     = {1--22},
  publisher = {Wiley Online Library},
  title     = {Maximum likelihood from incomplete data via the {EM} algorithm},
  volume    = 39,
  year      = 1977
}
@article{bryson1979applied,
  author  = {Bryson, Arthur and Ho, Y.-C and Siouris, George},
  doi     = {10.1109/TSMC.1979.4310229},
  journal = {IEEE Transactions on Systems, Man \& Cybernetics},
  month   = {07},
  pages   = {366--367},
  title   = {Applied Optimal Control: Optimization, Estimation, and Control},
  volume  = 9,
  year    = 1979
}
@incollection{fukushima1982neocognitron,
  author    = {Fukushima, Kunihiko and Miyake, Sei},
  booktitle = {Competition and Cooperation in Neural Nets},
  pages     = {267--285},
  title     = {Neocognitron: A self-organizing neural network model for a mechanism of visual pattern recognition},
  year      = 1982
}
@article{rubin1982algorithms,
  author    = {Rubin, Donald B and Thayer, Dorothy T},
  journal   = {Psychometrika},
  number    = 1,
  pages     = {69--76},
  publisher = {Springer},
  title     = {{EM} algorithms for {ML} factor analysis},
  volume    = 47,
  year      = 1982
}
@inproceedings{nesterov1983method,
  author    = {Nesterov, Yu E},
  booktitle = {Doklady Akademii Nauk SSSR},
  pages     = {543--547},
  title     = {A method for solving the convex programming problem with convergence rate},
  volume    = 269,
  year      = 1983
}
@article{rissanen1983universal,
  author    = {Rissanen, Jorma},
  journal   = {The Annals of Statistics},
  number    = 2,
  pages     = {416--431},
  publisher = {Institute of Mathematical Statistics},
  title     = {A universal prior for integers and estimation by minimum description length},
  volume    = 11,
  year      = 1983
}
@book{sutton1984temporal,
  author    = {Sutton, Richard Stuart},
  publisher = {Ph.D., University of Massachusetts Amherst},
  title     = {Temporal credit assignment in reinforcement learning},
  year      = 1984
}
@inproceedings{lecun1985une,
  author    = {LeCun, Yann},
  booktitle = {Proceedings of Cognitiva},
  language  = {English (US)},
  pages     = {599--604},
  title     = {Une procedure d'apprentissage pour reseau a seuil asymmetrique},
  year      = 1985
}
@book{parker1985learning,
  author    = {Parker, David B},
  publisher = {Alfred P. Sloan School of Management, MIT},
  title     = {Learning-logic: Casting the cortex of the human brain in silicon},
  year      = 1985
}
@misc{rumelhart1985learning,
  author       = {Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
  howpublished = {{\em Techical Report, La Jolla Institute for Cognitive Science, UCSD}},
  title        = {Learning internal representations by error propagation},
  year         = 1985
}
@inproceedings{dechter1986learning,
  author    = {Dechter, Rina},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {178–-183},
  title     = {Learning While Searching in Constraint-Satisfaction-Problems},
  year      = 1986
}
@article{rumelhart1986learning,
  author    = {Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
  journal   = {Nature},
  number    = 6088,
  pages     = {533--536},
  publisher = {Nature Publishing Group},
  title     = {Learning representations by back-propagating errors},
  volume    = 323,
  year      = 1986
}
@book{Harding-1986,
  author    = {Sandra Harding},
  publisher = {Cornell University Press},
  title     = {The Science Question in Feminism},
  year      = 1986
}
@article{murty1985some,
  author  = {Katta G. Murty and Santosh N. Kabadi},
  journal = {Mathematical Programming},
  number  = 2,
  pages   = {117--129},
  title   = {Some {NP}-complete problems in quadratic and nonlinear programming},
  volume  = 39,
  year    = 1987
}
@incollection{Goldberg-1987,
  author    = {David E. Goldberg},
  booktitle = {Genetic Algorithms and Simulated Annealing},
  pages     = {74--88},
  publisher = {Morgan Kaufmann},
  title     = {Simple Genetic Algorithms and the Minimal Deceptive Problem},
  year      = 1987
}
@inproceedings{hanson1988comparing,
  author    = {Hanson, Stephen Jos\'{e} and Pratt, Lorien Y.},
  booktitle = {Neural Information Processing Systems},
  pages     = {177-–185},
  title     = {Comparing Biases for Minimal Network Construction with Back-Propagation},
  volume    = 2,
  year      = 1988
}
@inproceedings{zhou1988computation,
  author    = {Zhou, Yi-Tong and Chellappa, Rama},
  booktitle = {IEEE International Conference on Neural Networks},
  pages     = {71--78},
  title     = {Computation of optical flow using a neural network},
  year      = 1988
}
@article{sutton1988learning,
  author    = {Sutton, Richard S},
  journal   = {Machine learning},
  number    = 1,
  pages     = {9--44},
  publisher = {Springer},
  title     = {Learning to predict by the methods of temporal differences},
  volume    = 3,
  year      = 1988
}
@article{cybenko1989approximation,
  author    = {Cybenko, George},
  journal   = {Mathematics of Control, Signals and Systems},
  number    = 4,
  pages     = {303--314},
  publisher = {Springer},
  title     = {Approximation by superpositions of a sigmoidal function},
  volume    = 2,
  year      = 1989
}
@article{lecun1989handwritten,
  author  = {LeCun, Yann and Boser, Bernhard and Denker, John and Henderson, Donnie and Howard, Richard and Hubbard, Wayne and Jackel, Lawrence},
  journal = {Neural Information Processing Systems},
  pages   = {396--404},
  title   = {Handwritten digit recognition with a back-propagation network},
  volume  = 2,
  year    = 1989
}
@article{lecun1989backpropagation,
  author  = {LeCun, Yann and Boser, Bernhard and Denker, John S and Henderson, Donnie and Howard, Richard E and Hubbard, Wayne and Jackel, Lawrence D},
  journal = {Neural Computation},
  number  = 4,
  pages   = {541--551},
  title   = {Backpropagation applied to handwritten zip code recognition},
  volume  = 1,
  year    = 1989
}
@article{waibel1989phoneme,
  author  = {Waibel, Alex and Hanazawa, Toshiyuki and Hinton, Geoffrey and Shikano, Kiyohiro and Lang, Kevin J},
  journal = {IEEE Transactions on Acoustics, Speech, and Signal Processing},
  number  = 3,
  pages   = {328--339},
  title   = {Phoneme recognition using time-delay neural networks},
  volume  = 37,
  year    = 1989
}
@article{hutchinson1989stochastic,
  author    = {Hutchinson, Michael F},
  journal   = {Communications in Statistics-Simulation and Computation},
  number    = 3,
  pages     = {1059--1076},
  publisher = {Taylor \& Francis},
  title     = {A stochastic estimator of the trace of the influence matrix for {L}aplacian smoothing splines},
  volume    = 18,
  year      = 1989
}
@book{watkins1989learning,
  author    = {Watkins, Christopher John Cornish Hellaby},
  publisher = {Ph.D., University of Cambridge},
  title     = {Learning from delayed rewards},
  year      = 1989
}
@article{baldi1989neural,
  author    = {Baldi, Pierre and Hornik, Kurt},
  journal   = {Neural networks},
  number    = 1,
  pages     = {53--58},
  publisher = {Elsevier},
  title     = {Neural networks and principal component analysis: Learning from examples without local minima},
  volume    = 2,
  year      = 1989
}
@article{bottou1990speaker,
  author    = {Bottou, L{\'e}on and Souli{\'e}, F Fogelman and Blanchet, Pascal and Li{\'e}nard, Jean-Sylvain},
  journal   = {Neural Networks},
  number    = 4,
  pages     = {453--465},
  publisher = {Elsevier},
  title     = {Speaker-independent isolated digit recognition: Multilayer perceptrons vs. dynamic time warping},
  volume    = 3,
  year      = 1990
}
@inproceedings{lecun1990optimal,
  author    = {LeCun, Yann and Denker, John S and Solla, Sara A},
  booktitle = {Neural Information Processing Systems},
  pages     = {598--605},
  title     = {Optimal brain damage},
  volume    = 3,
  year      = 1990
}
@book{Longino-1990,
  author    = {Helen E. Longino},
  publisher = {Princeton University Press},
  title     = {Science as Social Knowledge: Values and Objectivity in Scientific Inquiry},
  year      = 1990
}
@article{hornik1991approximation,
  author  = {Kurt Hornik},
  doi     = {https://doi.org/10.1016/0893-6080(91)90009-T},
  issn    = {0893-6080},
  journal = {Neural Networks},
  number  = 2,
  pages   = {251--257},
  title   = {Approximation capabilities of multilayer feedforward networks},
  volume  = 4,
  year    = 1991
}
@article{jacobs1991adaptive,
  author    = {Jacobs, Robert A and Jordan, Michael I and Nowlan, Steven J and Hinton, Geoffrey E},
  journal   = {Neural Computation},
  number    = 1,
  pages     = {79--87},
  publisher = {MIT Press},
  title     = {Adaptive mixtures of local experts},
  volume    = 3,
  year      = 1991
}
@article{Crenshaw-1991,
  author  = {Kimberle Crenshaw},
  journal = {Stanford Law Review},
  number  = 6,
  pages   = {1241--1299},
  title   = {Mapping the Margins: Intersectionality, Identity Politics, and Violence Against Women of Color},
  volume  = 43,
  year    = 1991
}
@article{wolpert1992stacked,
  author    = {Wolpert, David H},
  journal   = {Neural Networks},
  number    = 2,
  pages     = {241--259},
  publisher = {Elsevier},
  title     = {Stacked generalization},
  volume    = 5,
  year      = 1992
}
@article{watkins1992q,
  author    = {Watkins, Christopher JCH and Dayan, Peter},
  journal   = {Machine learning},
  number    = {3-4},
  pages     = {279--292},
  publisher = {Springer},
  title     = {Q-learning},
  volume    = 8,
  year      = 1992
}
@article{lin1992self,
  author  = {Lin, Long-Ji},
  journal = {Machine learning},
  pages   = {293--321},
  title   = {Self-improving reactive agents based on reinforcement learning, planning and teaching},
  volume  = 8,
  year    = 1992
}
@article{williams1992simple,
  author    = {Williams, Ronald J},
  journal   = {Machine learning},
  number    = 3,
  pages     = {229--256},
  publisher = {Springer},
  title     = {Simple statistical gradient-following algorithms for connectionist reinforcement learning},
  volume    = 8,
  year      = 1992
}
@article{blum1992training,
  author    = {Blum, Avrim L and Rivest, Ronald L},
  journal   = {Neural Networks},
  number    = 1,
  pages     = {117--127},
  publisher = {Elsevier},
  title     = {Training a 3-node neural network is {NP}-complete},
  volume    = 5,
  year      = 1992
}
@inproceedings{Mitchell-et-al-1992,
  author    = {Melanie Mitchell and Stephanie Forrest and John H. Holland},
  booktitle = {European Conference on Artificial Life},
  title     = {The Royal Road for Genetic Algorithms: Fitness Landscapes and {GA} Performance},
  year      = 1992
}
@inproceedings{hinton93keeping,
  author    = {Hinton, GE and van Camp, Drew},
  booktitle = {Computational learning theory},
  pages     = {5--13},
  title     = {Keeping neural networks simple by minimising the description length of weights},
  year      = 1993
}
@article{martin1993centered,
  author    = {Martin, Gale L},
  journal   = {Neural Computation},
  number    = 3,
  pages     = {419--429},
  publisher = {MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…},
  title     = {Centered-object integrated segmentation and recognition of overlapping handprinted characters},
  volume    = 5,
  year      = 1993
}
@article{bromley1993signature,
  author  = {Bromley, Jane and Guyon, Isabelle and LeCun, Yann and S{\"a}ckinger, Eduard and Shah, Roopak},
  journal = {Neural Information Processing Systems},
  pages   = {737--744},
  title   = {Signature verification using a ``{S}iamese'' time delay neural network},
  volume  = 6,
  year    = 1993
}
@inproceedings{hassibi1993second,
  author    = {Hassibi, Babak and Stork, David G},
  booktitle = {Neural Information Processing Systems},
  pages     = {164--171},
  title     = {Second order derivatives for network pruning: Optimal brain surgeon},
  volume    = 6,
  year      = 1993
}
@inproceedings{nix1994estimating,
  author    = {Nix, David A and Weigend, Andreas S},
  booktitle = {IEEE International Conference on Neural Networks},
  pages     = {55--60},
  title     = {Estimating the mean and variance of the target probability distribution},
  year      = 1994
}
@article{bishop1994mixture,
  author  = {Bishop, Christopher M},
  journal = {Aston University Technical Report},
  title   = {Mixture density networks},
  year    = 1994
}
@book{rummery1994line,
  author    = {Rummery, Gavin A and Niranjan, Mahesan},
  publisher = {Technical Report, University of Cambridge},
  title     = {On-line Q-learning using connectionist systems},
  year      = 1994
}
@book{vapnik95,
  address   = {New York},
  author    = {V. Vapnik},
  chapter   = {},
  edition   = {},
  key       = {},
  month     = {},
  note      = {},
  pages     = {},
  publisher = {Springer Verlag},
  series    = {},
  title     = {The nature of statistical learning theory},
  volume    = {},
  year      = 1995
}
@inproceedings{bishop1995regularization,
  author    = {Bishop, Christopher},
  booktitle = {International Conference on Artificial Neural Networks},
  pages     = {141--148},
  title     = {Regularization and Complexity Control in Feed-forward Networks},
  year      = 1995
}
@article{sjoberg1995overtraining,
  author    = {Sj{\"o}berg, Jonas and Ljung, Lennart},
  journal   = {International Journal of Control},
  number    = 6,
  pages     = {1391--1407},
  publisher = {Taylor \& Francis},
  title     = {Overtraining, regularization and searching for a minimum, with application to neural networks},
  volume    = 62,
  year      = 1995
}
@inproceedings{mackay1995ensemble,
  author    = {MacKay, David JC},
  booktitle = {Neural Information Processing Systems},
  pages     = {4083--4090},
  title     = {Ensemble learning and evidence maximization},
  volume    = 8,
  year      = 1995
}
@book{neal2012bayesian,
  author    = {Neal, Radford M},
  publisher = {Springer},
  title     = {Bayesian learning for neural networks},
  year      = 1995
}
@article{gordon1995stable,
  author  = {Gordon, Geoffrey J},
  journal = {Neural Information Processing Systems},
  pages   = {1052--1058},
  title   = {Stable fitted reinforcement learning},
  volume  = 8,
  year    = 1995
}
@incollection{Hofstadter-1995,
  author    = {Douglas R. Hofstadter},
  booktitle = {Fluid Concepts and Creative Analogies: Computer Models Of The Fundamental Mechanisms Of Thought},
  pages     = {155--168},
  publisher = {Basic Books},
  title     = {The Ineradicable {E}liza Effect and Its Dangers (Preface 4)},
  year      = 1995
}
@article{williams1996using,
  author    = {Williams, Peter M},
  journal   = {Neural Computation},
  number    = 4,
  pages     = {843--854},
  publisher = {MIT Press},
  title     = {Using neural networks to model conditional multivariate densities},
  volume    = 8,
  year      = 1996
}
@article{an1996effects,
  author    = {An, Guozhong},
  journal   = {Neural Computation},
  number    = 3,
  pages     = {643--674},
  publisher = {MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…},
  title     = {The effects of adding noise during backpropagation training on a generalization performance},
  volume    = 8,
  year      = 1996
}
@article{singh1996reinforcement,
  author    = {Singh, Satinder P and Sutton, Richard S},
  journal   = {Machine learning},
  number    = 1,
  pages     = {123--158},
  publisher = {Springer},
  title     = {Reinforcement learning with replacing eligibility traces},
  volume    = 22,
  year      = 1996
}
@incollection{Longino-1996,
  author    = {Helen E. Longino},
  booktitle = {Feminism, Science, and the Philosophy of Science},
  pages     = {39--58},
  title     = {Cognitive and Non-Cognitive Values in Science: Rethinking the Dichotomy},
  year      = 1996
}
@article{freund95,
  author    = {Freund, Yoav and Schapire, Robert E},
  journal   = {Journal of Computer and System Sciences},
  number    = 1,
  pages     = {119--139},
  publisher = {Elsevier},
  title     = {A decision-theoretic generalization of on-line learning and an application to boosting},
  volume    = 55,
  year      = 1997
}
@article{friedman1997bias,
  author    = {Friedman, Jerome H},
  journal   = {Data Mining and Knowledge Discovery},
  number    = 1,
  pages     = {55--77},
  publisher = {Springer},
  title     = {On bias, variance, 0/1—loss, and the curse-of-dimensionality},
  volume    = 1,
  year      = 1997
}
@article{barber1997ensemble,
  author  = {Barber, David and Bishop, Christopher},
  journal = {Neural Information Processing Systems},
  pages   = {395--401},
  title   = {Ensemble learning for multi-layer networks},
  volume  = 10,
  year    = 1997
}
@article{lawrence1997face,
  author  = {Lawrence, Steve and Giles, C Lee and Tsoi, Ah Chung and Back, Andrew D},
  journal = {IEEE Transactions on Neural Networks},
  number  = 1,
  pages   = {98--113},
  title   = {Face recognition: A convolutional neural-network approach},
  volume  = 8,
  year    = 1997
}
@article{hochreiter1997long,
  author    = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
  journal   = {Neural Computation},
  number    = 8,
  pages     = {1735--1780},
  publisher = {MIT Press},
  title     = {Long short-term memory},
  volume    = 9,
  year      = 1997
}
@article{hochreiter1997flat,
  author  = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
  journal = {Neural Computation},
  number  = 1,
  pages   = {1--42},
  title   = {Flat minima},
  volume  = 9,
  year    = 1997
}
@article{lecun1998gradient,
  author    = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick},
  journal   = {Proceedings of the IEEE},
  number    = 11,
  pages     = {2278--2324},
  publisher = {Ieee},
  title     = {Gradient-based learning applied to document recognition},
  volume    = 86,
  year      = 1998
}
@article{gales1998maximum,
  author    = {Gales, Mark JF},
  journal   = {Computer Speech \& Language},
  number    = 2,
  pages     = {75--98},
  publisher = {Elsevier},
  title     = {Maximum likelihood linear transformations for {HMM}-based speech recognition},
  volume    = 12,
  year      = 1998
}
@article{amari1998natural,
  author    = {Amari, Shun-Ichi},
  journal   = {Neural {C}omputation},
  number    = 2,
  pages     = {251--276},
  publisher = {MIT Press},
  title     = {Natural gradient works efficiently in learning},
  volume    = 10,
  year      = 1998
}
@inproceedings{beyer1999nearest,
  author    = {Beyer, Kevin and Goldstein, Jonathan and Ramakrishnan, Raghu and Shaft, Uri},
  booktitle = {International Conference on Database Theory},
  pages     = {217--235},
  title     = {When is “nearest neighbor” meaningful?},
  year      = 1999
}
@book{manning1999foundations,
  author    = {Manning, Christopher and Schutze, Hinrich},
  publisher = {MIT {P}ress},
  title     = {Foundations of statistical natural language processing},
  year      = 1999
}
@article{tipping1999probabilistic,
  author    = {Tipping, Michael E and Bishop, Christopher M},
  journal   = {Journal of the Royal Statistical Society: Series B},
  number    = 3,
  pages     = {611--622},
  publisher = {Wiley Online Library},
  title     = {Probabilistic principal component analysis},
  volume    = 61,
  year      = 1999
}
@book{sutton1999reinforcement,
  author    = {Sutton, Richard S and Barto, Andrew G},
  publisher = {MIT press},
  title     = {Reinforcement learning: An introduction},
  year      = 1999
}
@article{sutton1999policy,
  author  = {Sutton, Richard S and McAllester, David and Singh, Satinder and Mansour, Yishay},
  journal = {Neural Information Processing Systems},
  pages   = {1057--1063},
  title   = {Policy gradient methods for reinforcement learning with function approximation},
  volume  = 12,
  year    = 1999
}
@article{konda1999actor,
  author  = {Konda, Vijay and Tsitsiklis, John},
  journal = {Neural Information Processing Systems},
  pages   = {1008--1014},
  title   = {Actor-critic algorithms},
  volume  = 12,
  year    = 1999
}
@book{cristianini00,
  author    = {M. Cristianini and J. Shawe-Taylor},
  chapter   = {},
  edition   = {},
  key       = {},
  month     = {},
  note      = {},
  pages     = {},
  publisher = {CUP},
  series    = {},
  title     = {An Introduction to support vector machines},
  volume    = {},
  year      = 2000
}
@inproceedings{domingos2000unified,
  author    = {Domingos, Pedro},
  booktitle = {International Conference on Machine Learning},
  pages     = {231--238},
  title     = {A unified bias-variance decomposition},
  year      = 2000
}
@book{jurafsky2008speech,
  author    = {Jurafsky, Daniel and Martin, James H},
  publisher = {Pearson},
  title     = {Speech and Language Processing, 2nd Edition},
  year      = 2000
}
@article{bengio2000neural,
  author  = {Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent, Pascal},
  journal = {Neural Information Processing Systems},
  pages   = {932--938},
  title   = {A neural probabilistic language model},
  volume  = 13,
  year    = 2000
}
@article{koenker2001quantile,
  author  = {Koenker, Roger and Hallock, Kevin F},
  journal = {Journal of Economic Perspectives},
  number  = 4,
  pages   = {143--156},
  title   = {Quantile regression},
  volume  = 15,
  year    = 2001
}
@inproceedings{aggarwal2001surprising,
  author    = {Aggarwal, Charu C and Hinneburg, Alexander and Keim, Daniel A},
  booktitle = {International Conference on Database Theory},
  pages     = {420--434},
  title     = {On the surprising behavior of distance metrics in high dimensional space},
  year      = 2001
}
@article{kakade2001natural,
  author  = {Kakade, Sham M},
  journal = {Neural Information Processing Systems},
  pages   = {1531--1538},
  title   = {A natural policy gradient},
  volume  = 14,
  year    = 2001
}
@article{fernandez2002multihierarchical,
  author    = {Fern{\'a}ndez-Madrigal, J-A and Gonz{\'a}lez, Javier},
  journal   = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number    = 1,
  pages     = {103--113},
  publisher = {IEEE},
  title     = {Multihierarchical graph search},
  volume    = 24,
  year      = 2002
}
@book{Laffont-Martimort-2002,
  author    = {Jean-Jacques Laffont and David Martimort},
  publisher = {Princeton University Press},
  title     = {The Theory of Incentives: The Principal-Agent Model},
  year      = 2002
}
@inproceedings{ye2004high,
  author    = {Ye, Hui and Young, Steve},
  booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing},
  pages     = {1--9},
  title     = {High quality voice morphing},
  year      = 2004
}
@article{greensmith2004variance,
  author  = {Greensmith, Evan and Bartlett, Peter L and Baxter, Jonathan},
  journal = {Journal of Machine Learning Research},
  number  = 9,
  pages   = {1471--1530},
  title   = {Variance Reduction Techniques for Gradient Estimates in Reinforcement Learning},
  volume  = 5,
  year    = 2004
}
@article{zou2005regularization,
  author  = {Zou, Hui and Hastie, Trevor},
  journal = {Journal of the Royal Statistical Society: Series B},
  number  = 2,
  pages   = {301--320},
  title   = {Regularization and variable selection via the elastic net},
  volume  = 67,
  year    = 2005
}
@inproceedings{gori2005new,
  author    = {Gori, Marco and Monfardini, Gabriele and Scarselli, Franco},
  booktitle = {IEEE International Joint Conference on Neural Networks},
  number    = 2005,
  pages     = {729--734},
  title     = {A new model for learning in graph domains},
  year      = 2005
}
@inproceedings{riedmiller2005neural,
  author    = {Riedmiller, Martin},
  booktitle = {European Conference on Machine Learning},
  pages     = {317--328},
  title     = {Neural fitted {Q} iteration --- First experiences with a data efficient neural reinforcement learning method},
  year      = 2005
}
@article{Allen-et-al-2005,
  author  = {Colin Allen and Iva Smit and Wendell Wallach},
  journal = {Ethics and Information Technology},
  pages   = {149--155},
  title   = {Artificial morality: Top-down, bottom-up, and hybrid approaches},
  volume  = 7,
  year    = 2005
}
@book{bishop2006pattern,
  author    = {Bishop, Christopher M},
  publisher = {Springer},
  title     = {Pattern recognition and machine learning},
  year      = 2006
}
@book{williams2006gaussian,
  author    = {Williams, Christopher KI and Rasmussen, Carl Edward},
  publisher = {MIT Press},
  title     = {Gaussian processes for machine learning},
  year      = 2006
}
@article{lecun2006tutorial,
  author  = {LeCun, Yann and Chopra, Sumit and Hadsell, Raia and Ranzato, M and Huang, F},
  editor  = {BakIr, G{\"o}khan and Hofmann, Thomas and Smola, Alexander J and Sch{\"o}lkopf, Bernhard and Taskar, Ben},
  journal = {Predicting structured data},
  number  = {0},
  title   = {A tutorial on energy-based learning},
  volume  = 1,
  year    = 2006
}
@article{hinton2006reducing,
  author    = {Hinton, Geoffrey E and Salakhutdinov, Ruslan R},
  journal   = {Science},
  number    = 5786,
  pages     = {504--507},
  publisher = {American Association for the Advancement of Science},
  title     = {Reducing the dimensionality of data with neural networks},
  volume    = 313,
  year      = 2006
}
@inproceedings{buciluǎ2006model,
  author    = {Buciluǎ, Cristian and Caruana, Rich and Niculescu-Mizil, Alexandru},
  booktitle = {ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
  pages     = {535--541},
  title     = {Model compression},
  year      = 2006
}
@article{Moor-2006,
  author  = {James Moor},
  journal = {Intelligence Systems},
  number  = 4,
  pages   = {18--21},
  title   = {The Nature, Importance, and Difficulty of Machine Ethics},
  volume  = 21,
  year    = 2006
}
@inproceedings{cao2007learning,
  author    = {Cao, Zhe and Qin, Tao and Liu, Tie-Yan and Tsai, Ming-Feng and Li, Hang},
  booktitle = {International Conference on Machine Learning},
  pages     = {129--136},
  title     = {Learning to rank: {F}rom pairwise approach to listwise approach},
  year      = 2007
}
@inproceedings{xia2008listwise,
  author    = {Xia, Fen and Liu, Tie-Yan and Wang, Jue and Zhang, Wensheng and Li, Hang},
  booktitle = {International Conference on Machine Learning},
  pages     = {1192--1199},
  title     = {Listwise approach to learning to rank: theory and algorithm},
  year      = 2008
}
@book{griewank2008evaluating,
  author    = {Griewank, Andreas and Walther, Andrea},
  publisher = {SIAM},
  title     = {Evaluating derivatives: {P}rinciples and techniques of algorithmic differentiation},
  year      = 2008
}
@article{scarselli2008graph,
  author    = {Scarselli, Franco and Gori, Marco and Tsoi, Ah Chung and Hagenbuchner, Markus and Monfardini, Gabriele},
  journal   = {IEEE Transactions on Neural Networks},
  number    = 1,
  pages     = {61--80},
  publisher = {IEEE},
  title     = {The graph neural network model},
  volume    = 20,
  year      = 2008
}
@inproceedings{vincent2008extracting,
  author    = {Vincent, Pascal and Larochelle, Hugo and Bengio, Yoshua and Manzagol, Pierre-Antoine},
  booktitle = {International Conference on Machine Learning},
  pages     = {1096--1103},
  title     = {Extracting and composing robust features with denoising autoencoders},
  year      = 2008
}
@article{peters2008reinforcement,
  author    = {Peters, Jan and Schaal, Stefan},
  journal   = {Neural Networks},
  number    = 4,
  pages     = {682--697},
  publisher = {Elsevier},
  title     = {Reinforcement learning of motor skills with policy gradients},
  volume    = 21,
  year      = 2008
}
@inproceedings{Lehman-Stanley-2008,
  author    = {Joel Lehman and Kenneth O. Stanley},
  booktitle = {International Conference on Artificial Life},
  pages     = {329--336},
  title     = {Exploiting Open-endedness to Solve Problems Through the Search for Novelty},
  year      = 2008
}
@inproceedings{Arkin-2008a,
  author    = {Ronald C. Arkin},
  booktitle = {ACM/IEEE International Conference on Human Robot Interaction},
  pages     = {121--128},
  title     = {Governing lethal behavior: Embedding ethics in a hybrid deliberative/reactive robot architecture---{P}art {I}: {M}otivation and philosophy},
  year      = 2008
}
@inproceedings{Arkin-2008b,
  author    = {Ronald C. Arkin},
  booktitle = {Conference on Artificial General Intelligence},
  pages     = {51--62},
  title     = {Governing lethal behavior: Embedding ethics in a hybrid deliberative/reactive robot architecture---{P}art {II}: {F}ormalization for ethical control},
  year      = 2008
}
@incollection{Anderson-Anderson-2008,
  author    = {Michael Anderson and Susan Leigh Anderson},
  booktitle = {Advanced Computational Intelligence Paradigms in Healthcare 3. Studies in Computational Intelligence},
  pages     = {233--257},
  title     = {Ethical healthcare agents},
  volume    = 107,
  year      = 2008
}
@article{Wallach-et-al-2008,
  author  = {Wendell Wallach and Colin Allen and Iva Smit},
  journal = {AI \& Society},
  number  = 4,
  pages   = {565--582},
  title   = {Machine Morality: Bottom-Up and Top-Down Approaches for Modeling Human Moral Faculties},
  volume  = 22,
  year    = 2008
}
@incollection{Narayanan-Shmatikov-2008,
  author    = {Arvind Narayanan and Vitaly Shmatikov},
  booktitle = {IEEE Symposium on Security and Privacy},
  pages     = {111--125},
  title     = {Robust de-anonymization of large sparse datasets},
  year      = 2008
}
@book{koller2009probabilistic,
  author    = {Koller, Daphne and Friedman, Nir},
  publisher = {MIT {P}ress},
  title     = {Probabilistic graphical models: Principles and techniques},
  year      = 2009
}
@inproceedings{jarrett2009what,
  author    = {Jarrett, Kevin and Kavukcuoglu, Koray and Ranzato, Marc'Aurelio and LeCun, Yann},
  booktitle = {IEEE International Conference on Computer Vision},
  doi       = {10.1109/ICCV.2009.5459469},
  number    = {},
  pages     = {2146--2153},
  title     = {What is the best multi-stage architecture for object recognition?},
  volume    = {},
  year      = 2009
}
@article{chen2009ranking,
  author  = {Chen, Wei and Liu, Tie-Yan and Lan, Yanyan and Ma, Zhi-Ming and Li, Hang},
  journal = {Neural Information Processing Systems},
  pages   = {315--323},
  title   = {Ranking measures and loss functions in learning to rank},
  volume  = 22,
  year    = 2009
}
@article{fallah2009nonlinear,
  author    = {Fallah, Nader and Gu, Hong and Mohammad, Kazem and Seyyedsalehi, Seyyed Ali and Nourijelyani, Keramat and Eshraghian, Mohammad Reza},
  journal   = {Neural Computing and Applications},
  number    = 8,
  pages     = {939--943},
  publisher = {Springer},
  title     = {Nonlinear {P}oisson regression using neural networks: {A} simulation study},
  volume    = 18,
  year      = 2009
}
@article{mezard2008constraint,
  author    = {M{\'e}zard, Marc and Mora, Thierry},
  journal   = {Journal of Physiology-Paris},
  number    = {1-2},
  pages     = {107--113},
  publisher = {Elsevier},
  title     = {Constraint satisfaction problems and neural networks: A statistical physics perspective},
  volume    = 103,
  year      = 2009
}
@inproceedings{gales2009support,
  author    = {Gales, Mark JF and Ragni, Anton and AlDamarki, H and Gautier, C},
  booktitle = {2009 IEEE Workshop on Automatic Speech Recognition \& Understanding},
  pages     = {205--210},
  title     = {Support vector machines for noise robust {ASR}},
  year      = 2009
}
@inproceedings{deng2009imagenet,
  author    = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li},
  booktitle = {IEEE Computer Vision \& Pattern Recognition},
  pages     = {248--255},
  title     = {Image{N}et: A large-scale hierarchical image database},
  year      = 2009
}
@article{erhan2009visualizing,
  author  = {Erhan, Dumitru and Bengio, Yoshua and Courville, Aaron and Vincent, Pascal},
  journal = {Technical Report, University of Montreal},
  number  = 3,
  title   = {Visualizing higher-layer features of a deep network},
  volume  = 1341,
  year    = 2009
}
@article{krizhevsky2009learning,
  author  = {Krizhevsky, Alex and Hinton, Geoffrey},
  journal = {Technical Report, University of Toronto},
  title   = {Learning multiple layers of features from tiny images},
  year    = 2009
}
@inproceedings{nair2010rectified,
  author    = {Nair, Vinod and Hinton, Geoffrey E},
  booktitle = {International Conference on Machine Learning},
  pages     = {807--814},
  title     = {Rectified linear units improve restricted {B}oltzmann machines},
  year      = 2010
}
@article{glorot2010understanding,
  author  = {Glorot, Xavier and Bengio, Yoshua},
  journal = {International Conference on Artificial Intelligence and Statistics},
  pages   = {249--256},
  title   = {Understanding the difficulty of training deep feedforward neural networks},
  volume  = 9,
  year    = 2010
}
@misc{domke2010notes,
  author       = {Justin Domke},
  howpublished = {\url{https://people.cs.umass.edu/~domke/}},
  title        = {Statistical machine learning},
  year         = 2010
}
@article{aiken2010efficacy,
  author  = {Aiken, Milam and Park, Mina},
  journal = {Translation Journal},
  number  = 1,
  title   = {The efficacy of round-trip translation for {MT} evaluation},
  volume  = 14,
  year    = 2010
}
@inproceedings{scherer2010evaluation,
  author    = {Scherer, Dominik and M{\"u}ller, Andreas and Behnke, Sven},
  booktitle = {International Conference on Artificial Neural Networks},
  pages     = {92--101},
  title     = {Evaluation of pooling operations in convolutional architectures for object recognition},
  year      = 2010
}
@article{tabak2010density,
  author    = {Tabak, Esteban G and Vanden-Eijnden, Eric},
  journal   = {Communications in Mathematical Sciences},
  number    = 1,
  pages     = {217--233},
  publisher = {International Press of Boston},
  title     = {Density estimation by dual ascent of the log-likelihood},
  volume    = 8,
  year      = 2010
}
@article{hasselt2010double,
  author  = {Van Hasselt, Hado},
  journal = {Neural Information Processing Systems},
  pages   = {2613--2621},
  title   = {Double {Q}-learning},
  volume  = 23,
  year    = 2010
}
@inproceedings{morimura2010nonparametric,
  author    = {Morimura, Tetsuro and Sugiyama, Masashi and Kashima, Hisashi and Hachiya, Hirotaka and Tanaka, Toshiyuki},
  booktitle = {International Conference on Machine Learning},
  pages     = {799--806},
  title     = {Nonparametric return distribution approximation for reinforcement learning},
  year      = 2010
}
@incollection{Keynes-2010,
  author    = {John Maynard Keynes},
  booktitle = {Essays in Persuasion},
  pages     = {321--332},
  publisher = {Palgrave Macmillan},
  title     = {Economic Possibilities for Our Grandchildren},
  year      = 2010
}
@inproceedings{glorot2011deep,
  author    = {Glorot, Xavier and Bordes, Antoine and Bengio, Yoshua},
  booktitle = {International Conference on Artificial Intelligence and Statistics},
  pages     = {315--323},
  title     = {Deep sparse rectifier neural networks},
  year      = 2011
}
@article{duchi2011adaptive,
  author  = {Duchi, John and Hazan, Elad and Singer, Yoram},
  journal = {Journal of Machine Learning Research},
  pages   = {2121--2159},
  title   = {Adaptive subgradient methods for online learning and stochastic optimization},
  volume  = 12,
  year    = 2011
}
@article{niu2011hogwild,
  author  = {Recht, Benjamin and Re, Christopher and Wright, Stephen and Niu, Feng},
  journal = {Neural Information Processing Systems},
  pages   = {693--701},
  title   = {Hogwild!: A lock-free approach to parallelizing stochastic gradient descent},
  volume  = 24,
  year    = 2011
}
@inproceedings{hutter2011sequential,
  author    = {Hutter, Frank and Hoos, Holger H and Leyton-Brown, Kevin},
  booktitle = {International Conference on Learning and Intelligent Optimization},
  pages     = {507--523},
  title     = {Sequential model-based optimization for general algorithm configuration},
  year      = 2011
}
@inproceedings{bergstra2011algorithms,
  author    = {Bergstra, James S and Bardenet, R{\'e}mi and Bengio, Yoshua and K{\'e}gl, Bal{\'a}zs},
  booktitle = {Neural Information Processing Systems},
  pages     = {2546--2554},
  title     = {Algorithms for hyper-parameter optimization},
  volume    = 24,
  year      = 2011
}
@inproceedings{welling2011bayesian,
  author    = {Welling, Max and Teh, Yee W},
  booktitle = {International Conference on Machine Learning},
  pages     = {681--688},
  title     = {Bayesian learning via stochastic gradient {L}angevin dynamics},
  year      = 2011
}
@inproceedings{kolomiyets2011model,
  author    = {Kolomiyets, Oleksandr and Bethard, Steven and Moens, Marie-Francine},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {271--276},
  title     = {Model-portability experiments for textual temporal analysis},
  year      = 2011
}
@inproceedings{zeiler2011adaptive,
  author    = {Zeiler, Matthew D and Taylor, Graham W and Fergus, Rob},
  booktitle = {IEEE International Conference on Computer Vision},
  pages     = {2018--2025},
  title     = {Adaptive deconvolutional networks for mid and high level feature learning},
  year      = 2011
}
@article{Vallor-2011,
  author  = {Shannon Vallor},
  journal = {Philosophy and Technology},
  number  = 3,
  pages   = {251--268},
  title   = {Carebots and caregivers: {S}ustaining the ethical ideal of care in the 21st century},
  volume  = 24,
  year    = 2011
}
@book{Kitcher-2011a,
  author    = {Philip Kitcher},
  publisher = {Harvard University Press},
  title     = {The Ethical Project},
  year      = 2011
}
@book{Kitcher-2011b,
  author    = {Philip Kitcher},
  publisher = {Prometheus Books},
  title     = {Science in a Democratic Society},
  year      = 2011
}
@book{prince2012computer,
  author    = {Prince, Simon Jeremy Damion},
  publisher = {Cambridge University Press},
  title     = {Computer vision: Models, learning, and inference},
  year      = 2012
}
@article{krizhevsky2012imagenet,
  author  = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  journal = {Neural Information Processing Systems},
  pages   = {1097--1105},
  title   = {Image{N}et Classification with Deep Convolutional Neural Networks},
  volume  = 25,
  year    = 2012
}
@inproceedings{Bottou2012stochastic,
  abstract  = {Chapter 1 strongly advocates the stochastic back-propagation method to train neural networks. This is in fact an instance of a more general technique called stochastic gradient descent (SGD). This chapter provides background material, explains why SGD is a good learning algorithm when the training set is large, and provides useful recommendations.},
  author    = {Bottou, L{\'e}on},
  booktitle = {Neural Networks: Tricks of the Trade: Second Edition},
  pages     = {421--436},
  title     = {Stochastic Gradient Descent Tricks},
  year      = 2012
}
@misc{hinton2012rmsprop,
  author       = {Geoffrey Hinton and Nitish Srivastava and Kevin Swersky},
  howpublished = {\url{https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf}},
  title        = {Neural Networks for Machine Learning: {L}ecture 6a -- {O}verview of mini-batch gradient descent},
  year         = 2012
}
@article{zeiler2012adadelta,
  author  = {Zeiler, Matthew D},
  journal = {arXiv:1212.5701},
  title   = {{ADADELTA}: {A}n adaptive learning rate method},
  year    = 2012
}
@incollection{lecun2012efficient,
  author    = {LeCun, Yann A and Bottou, L{\'e}on and Orr, Genevieve B and M{\"u}ller, Klaus-Robert},
  booktitle = {Neural Networks: Tricks of the trade},
  pages     = {9--48},
  publisher = {Springer},
  title     = {Efficient backprop},
  year      = 2012
}
@article{moreno2012unifying,
  author    = {Moreno-Torres, Jose G and Raeder, Troy and Alaiz-Rodr{\'\i}guez, Roc{\'\i}o and Chawla, Nitesh V and Herrera, Francisco},
  journal   = {Pattern Recognition},
  number    = 1,
  pages     = {521--530},
  publisher = {Elsevier},
  title     = {A unifying view on dataset shift in classification},
  volume    = 45,
  year      = 2012
}
@article{bergstra2012random,
  author  = {Bergstra, James and Bengio, Yoshua},
  journal = {Journal of Machine Learning Research},
  number  = 10,
  pages   = {281--305},
  title   = {Random search for hyper-parameter optimization},
  volume  = 13,
  year    = 2012
}
@inproceedings{snoek2012practical,
  author    = {Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P},
  booktitle = {Neural Information Processing Systems},
  pages     = {2951--2959},
  title     = {Practical {B}ayesian optimization of machine learning algorithms},
  volume    = 25,
  year      = 2012
}
@article{hinton2012improving,
  author  = {Hinton, Geoffrey E and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R},
  journal = {arXiv:1207.0580},
  title   = {Improving neural networks by preventing co-adaptation of feature detectors},
  year    = 2012
}
@inproceedings{abdel2012applying,
  author    = {Abdel-Hamid, Ossama and Mohamed, Abdel-rahman and Jiang, Hui and Penn, Gerald},
  booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  pages     = {4277--4280},
  title     = {Applying convolutional neural networks concepts to hybrid {NN-HMM} model for speech recognition},
  year      = 2012
}
@article{ji20123d,
  author  = {Ji, Shuiwang and Xu, Wei and Yang, Ming and Yu, Kai},
  journal = {IEEE Transactions on Pattern Analysis \& Machine Intelligence},
  number  = 1,
  pages   = {221--231},
  title   = {{3D} convolutional neural networks for human action recognition},
  volume  = 35,
  year    = 2012
}
@inproceedings{schuster2012japanese,
  author    = {Schuster, Mike and Nakajima, Kaisuke},
  booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  pages     = {5149--5152},
  title     = {Japanese and {K}orean voice search},
  year      = 2012
}
@article{Sharkey-Sharkey-2012,
  author  = {Amanda Sharkey and Noel Sharkey},
  journal = {Ethics and Information Technology},
  number  = 1,
  pages   = {27--40},
  title   = {Granny and the robots: {E}thical issues in robot care for the elderly},
  volume  = 14,
  year    = 2012
}
@article{Asaro-2012,
  author  = {Peter Asaro},
  journal = {International Review of the Red Cross},
  number  = 886,
  pages   = {687--709},
  title   = {On banning autonomous weapon systems: human rights, automation, and the dehumanization of lethal decision-making},
  volume  = 94,
  year    = 2012
}
@inproceedings{maas2013rectifier,
  author    = {Maas, Andrew L and Hannun, Awni Y and Ng, Andrew Y},
  booktitle = {ICML Workshop on Deep Learning for Audio, Speech, and Language Processing},
  title     = {Rectifier nonlinearities improve neural network acoustic models},
  year      = 2013
}
@article{pascanu2014number,
  author  = {Pascanu, Razvan and Mont{\'u}far, Guido and Bengio, Yoshua},
  journal = {arXiv:1312.6098},
  title   = {On the number of response regions of deep feed forward networks with piece-wise linear activations},
  year    = 2013
}
@article{johnson2013accelerating,
  author    = {Johnson, Rie and Zhang, Tong},
  journal   = {Neural Information Processing Systems},
  pages     = {315--323},
  publisher = {Citeseer},
  title     = {Accelerating stochastic gradient descent using predictive variance reduction},
  volume    = 26,
  year      = 2013
}
@inproceedings{sutskever2013importance,
  author    = {Sutskever, Ilya and Martens, James and Dahl, George and Hinton, Geoffrey},
  booktitle = {International Conference on Machine Learning},
  pages     = {1139--1147},
  title     = {On the importance of initialization and momentum in deep learning},
  year      = 2013
}
@inproceedings{wan2013regularization,
  author    = {Wan, Li and Zeiler, Matthew and Zhang, Sixin and Le Cun, Yann and Fergus, Rob},
  booktitle = {International Conference on Machine Learning},
  pages     = {1058--1066},
  title     = {Regularization of neural networks using {D}rop{C}onnect},
  year      = 2013
}
@inproceedings{jaitly2013vocal,
  author    = {Jaitly, Navdeep and Hinton, Geoffrey E},
  booktitle = {ICML Workshop on Deep Learning for Audio, Speech and Language},
  title     = {Vocal tract length perturbation ({VTLP}) improves speech recognition},
  year      = 2013
}
@inproceedings{kanda2013elastic,
  author    = {Kanda, Naoyuki and Takeda, Ryu and Obuchi, Yasunari},
  booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding},
  pages     = {309--314},
  title     = {Elastic spectral distortion for low resource speech recognition with deep neural networks},
  year      = 2013
}
@inproceedings{sainath2013improvements,
  author    = {Sainath, Tara N and Kingsbury, Brian and Mohamed, Abdel-rahman and Dahl, George E and Saon, George and Soltau, Hagen and Beran, Tomas and Aravkin, Aleksandr Y and Ramabhadran, Bhuvana},
  booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding},
  pages     = {315--320},
  title     = {Improvements to deep convolutional neural networks for {LVCSR}},
  year      = 2013
}
@inproceedings{sifre2013rotation,
  author    = {Sifre, Laurent and Mallat, St{\'e}phane},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1233--1240},
  title     = {Rotation, scaling and deformation invariant scattering for texture discrimination},
  year      = 2013
}
@article{bruna2013spectral,
  author  = {Bruna, Joan and Zaremba, Wojciech and Szlam, Arthur and LeCun, Yann},
  journal = {International Conference on Learning Representations},
  title   = {Spectral networks and locally connected networks on graphs},
  year    = 2013
}
@article{tabak2013family,
  author    = {Tabak, Esteban G and Turner, Cristina V},
  journal   = {Communications on Pure and Applied Mathematics},
  number    = 2,
  pages     = {145--164},
  publisher = {Wiley Online Library},
  title     = {A family of nonparametric density estimation algorithms},
  volume    = 66,
  year      = 2013
}
@article{rippel2013high,
  author  = {Rippel, Oren and Adams, Ryan Prescott},
  journal = {arXiv:1302.5125},
  title   = {High-dimensional probability estimation with deep density models},
  year    = 2013
}
@incollection{barto2013intrinsic,
  author    = {Barto, Andrew G},
  booktitle = {Intrinsically Motivated Learning in Natural and Artificial Systems},
  pages     = {17--47},
  title     = {Intrinsic motivation and reinforcement learning},
  year      = 2013
}
@article{Kosinski-et-al-2013,
  author  = {Michal Kosinski and David Stillwell and Thore Graepel},
  journal = {Proceedings of the National Academy of Sciences of the United States of America},
  number  = 15,
  pages   = {5802--5805},
  title   = {Private traits and attributes are predictable from digital records of human behavior},
  volume  = 110,
  year    = 2013
}
@article{montufar2014number,
  author  = {Mont{\'u}far, Guido F and Pascanu, Razvan and Cho, Kyunghyun and Bengio, Yoshua},
  journal = {Neural Information Processing Systems},
  pages   = {2924--2932},
  title   = {On the number of linear regions of deep neural networks},
  volume  = 27,
  year    = 2014
}
@article{srivastava2014dropout,
  author    = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
  journal   = {Journal of Machine Learning Research},
  number    = 1,
  pages     = {1929--1958},
  publisher = {JMLR. org},
  title     = {Dropout: {A} simple way to prevent neural networks from overfitting},
  volume    = 15,
  year      = 2014
}
@inproceedings{chen2014stochastic,
  author    = {Chen, Tianqi and Fox, Emily and Guestrin, Carlos},
  booktitle = {International Conference on Machine Learning},
  pages     = {1683--1691},
  title     = {Stochastic gradient {H}amiltonian {M}onte {C}arlo},
  year      = 2014
}
@inproceedings{sharif2014cnn,
  author    = {Sharif Razavian, Ali and Azizpour, Hossein and Sullivan, Josephine and Carlsson, Stefan},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshop},
  pages     = {806--813},
  title     = {{CNN} features off-the-shelf: {A}n astounding baseline for recognition},
  year      = 2014
}
@article{hannun2014deep,
  author  = {Awni Y. Hannun and Carl Case and Jared Casper and Bryan Catanzaro and Greg Diamos and Erich Elsen and Ryan Prenger and Sanjeev Satheesh and Shubho Sengupta and Adam Coates and Andrew Y. Ng},
  journal = {arXiv:1412.5567},
  title   = {Deep speech: Scaling up end-to-end speech recognition},
  year    = 2014
}
@article{simonyan2014very,
  author  = {Simonyan, Karen and Zisserman, Andrew},
  journal = {International Conference on Learning Representations},
  title   = {Very deep convolutional networks for large-scale image recognition},
  year    = 2014
}
@article{lin2013network,
  author  = {Lin, Min and Chen, Qiang and Yan, Shuicheng},
  journal = {International Conference on Learning Representations},
  title   = {Network in network},
  year    = 2014
}
@inproceedings{zeiler2014visualizing,
  author    = {Zeiler, Matthew D and Fergus, Rob},
  booktitle = {European Conference on Computer Vision},
  pages     = {818--833},
  title     = {Visualizing and understanding convolutional networks},
  year      = 2014
}
@article{kanazawa2014locally,
  author  = {Kanazawa, Angjoo and Sharma, Abhishek and Jacobs, David},
  journal = {Neural Information Processing Systems Workshop},
  title   = {Locally scale-invariant convolutional neural networks},
  year    = 2014
}
@inproceedings{girshick2014rich,
  author    = {Girshick, Ross and Donahue, Jeff and Darrell, Trevor and Malik, Jitendra},
  booktitle = {IEEE Computer Vision \& Pattern Recognition},
  pages     = {580--587},
  title     = {Rich feature hierarchies for accurate object detection and semantic segmentation},
  year      = 2014
}
@inproceedings{cho2014properties,
  author    = {Kyunghyun Cho and Bart van Merrienboer and Dzmitry Bahdanau and Yoshua Bengio},
  booktitle = {ACL Workshop on Syntax, Semantics and Structure in Statistical Translation},
  pages     = {103--111},
  title     = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches},
  year      = 2014
}
@article{chung2014empirical,
  author  = {Chung, Junyoung and Gulcehre, Caglar and Cho, KyungHyun and Bengio, Yoshua},
  journal = {Deep Learning and Representation Workshop},
  title   = {Empirical evaluation of gated recurrent neural networks on sequence modeling},
  year    = 2014
}
@article{goodfellow2014generative,
  author    = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
  journal   = {Communications of the ACM},
  number    = 11,
  pages     = {139--144},
  publisher = {ACM New York, NY, USA},
  title     = {Generative adversarial networks},
  volume    = 63,
  year      = 2014
}
@article{kingma2013auto,
  author  = {Kingma, Diederik P and Welling, Max},
  journal = {International Conference on Learning Representations},
  title   = {Auto-encoding variational {B}ayes},
  year    = 2014
}
@article{mirza2014conditional,
  author  = {Mirza, Mehdi and Osindero, Simon},
  journal = {arXiv:1411.1784},
  title   = {Conditional generative adversarial nets},
  year    = 2014
}
@inproceedings{rezende2014stochastic,
  author    = {Rezende, Danilo Jimenez and Mohamed, Shakir and Wierstra, Daan},
  booktitle = {International Conference on Machine Learning},
  pages     = {1278--1286},
  title     = {Stochastic backpropagation and approximate inference in deep generative models},
  year      = 2014
}
@inproceedings{silver2014deterministic,
  author    = {Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
  booktitle = {International Conference on Machine Learning},
  pages     = {387--395},
  title     = {Deterministic policy gradient algorithms},
  year      = 2014
}
@article{livni2014computational,
  author  = {Livni, Roi and Shalev-Shwartz, Shai and Shamir, Ohad},
  journal = {Neural Information Processing Systems},
  pages   = {855--863},
  title   = {On the computational efficiency of training neural networks},
  volume  = 27,
  year    = 2014
}
@article{pascanu2014saddle,
  author  = {Pascanu, Razvan and Dauphin, Yann N and Ganguli, Surya and Bengio, Yoshua},
  journal = {arXiv:1405.4604},
  title   = {On the saddle point problem for non-convex optimization},
  year    = 2014
}
@inproceedings{dauphin2014identifying,
  author    = {Yann N. Dauphin and Razvan Pascanu and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and KyungHyun Cho and Surya Ganguli and Yoshua Bengio},
  booktitle = {Neural Information Processing Systems},
  pages     = {2933--2941},
  title     = {Identifying and attacking the saddle point problem in high-dimensional non-convex optimization},
  volume    = 27,
  year      = 2014
}
@article{szegedy2013intriguing,
  author  = {Szegedy, Christian and Zaremba, Wojciech and Sutskever, Ilya and Bruna, Joan and Erhan, Dumitru and Goodfellow, Ian and Fergus, Rob},
  journal = {International Conference on Learning Representations},
  title   = {Intriguing properties of neural networks},
  year    = 2014
}
@inproceedings{noh2015learning,
  author    = {Noh, Hyeonwoo and Hong, Seunghoon and Han, Bohyung},
  booktitle = {IEEE International Conference on Computer Vision},
  pages     = {1520--1528},
  title     = {Learning deconvolution network for semantic segmentation},
  year      = 2015
}
@article{david2015there,
  author  = {David, HJJOEP},
  journal = {Journal of {E}conomic {P}erspectives},
  number  = 3,
  pages   = {3--30},
  title   = {Why are there still so many jobs? {T}he history and future of workplace automation},
  volume  = 29,
  year    = 2015
}
@inproceedings{he2015delving,
  author    = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle = {IEEE International Conference on Computer Vision},
  pages     = {1026--1034},
  title     = {Delving deep into rectifiers: Surpassing human-level performance on {I}mage{N}et classification},
  year      = 2015
}
@article{clevert2015fast,
  author  = {Clevert, Djork-Arn{\'e} and Unterthiner, Thomas and Hochreiter, Sepp},
  journal = {arXiv:1511.07289},
  title   = {Fast and accurate deep network learning by exponential linear units ({ELU}s)},
  year    = 2015
}
@article{lecun2015deep,
  author    = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
  journal   = {Nature},
  number    = 7553,
  pages     = {436--444},
  publisher = {Nature Publishing Group},
  title     = {Deep learning},
  volume    = 521,
  year      = 2015
}
@inproceedings{kingma2014adam,
  author    = {Diederik P. Kingma and Jimmy Ba},
  booktitle = {International Conference on Learning Representations},
  title     = {Adam: {A} Method for Stochastic Optimization},
  year      = 2015
}
@article{ioffe2015batch,
  author  = {Ioffe, Sergey and Szegedy, Christian},
  journal = {International Conference on Machine Learning},
  pages   = {448--456},
  title   = {Batch normalization: Accelerating deep network training by reducing internal covariate shift},
  year    = 2015
}
@article{xing2015petuum,
  author    = {Xing, Eric P and Ho, Qirong and Dai, Wei and Kim, Jin Kyu and Wei, Jinliang and Lee, Seunghak and Zheng, Xun and Xie, Pengtao and Kumar, Abhimanu and Yu, Yaoliang},
  journal   = {IEEE Transactions on Big Data},
  number    = 2,
  pages     = {49--67},
  publisher = {IEEE},
  title     = {Petuum: A new platform for distributed machine learning on big data},
  volume    = 1,
  year      = 2015
}
@article{wu2015deep,
  author  = {Wu, Ren and Yan, Shengen and Shan, Yi and Dang, Qingqing and Sun, Gang},
  journal = {arXiv:1501.02876},
  number  = 8,
  title   = {Deep image: Scaling up image recognition},
  volume  = 7,
  year    = 2015
}
@article{xu2015empirical,
  author  = {Bing Xu and Naiyan Wang and Tianqi Chen and Mu Li},
  journal = {arXiv:1505.00853},
  title   = {Empirical Evaluation of Rectified Activations in Convolutional Network},
  year    = 2015
}
@inproceedings{blundell2015weight,
  author    = {Blundell, Charles and Cornebise, Julien and Kavukcuoglu, Koray and Wierstra, Daan},
  booktitle = {International Conference on Machine Learning},
  pages     = {1613--1622},
  title     = {Weight uncertainty in neural network},
  year      = 2015
}
@article{ma2015complete,
  author  = {Ma, Yi-An and Chen, Tianqi and Fox, Emily},
  journal = {Neural Information Processing Systems},
  pages   = {2917--2925},
  title   = {A complete recipe for stochastic gradient {MCMC}},
  volume  = 28,
  year    = 2015
}
@inproceedings{doersch2015unsupervised,
  author    = {Doersch, Carl and Gupta, Abhinav and Efros, Alexei A},
  booktitle = {IEEE International Conference on Computer Vision},
  pages     = {1422--1430},
  title     = {Unsupervised visual representation learning by context prediction},
  year      = 2015
}
@inproceedings{goodfellow2014explaining,
  author    = {Ian J. Goodfellow and Jonathon Shlens and Christian Szegedy},
  booktitle = {International Conference on Learning Representations},
  title     = {Explaining and Harnessing Adversarial Examples},
  year      = 2015
}
@article{russakovsky2015imagenet,
  author  = {Russakovsky, Olga and Deng, Jia and Su, Hao and Krause, Jonathan and Satheesh, Sanjeev and Ma, Sean and Huang, Zhiheng and Karpathy, Andrej and Khosla, Aditya and Bernstein, Michael and others},
  journal = {International Journal of Computer Vision},
  number  = 3,
  pages   = {211--252},
  title   = {Image{N}et large scale visual recognition challenge},
  volume  = 115,
  year    = 2015
}
@article{yu2015multi,
  author  = {Yu, Fisher and Koltun, Vladlen},
  journal = {International Conference on Learning Representations},
  title   = {Multi-scale context aggregation by dilated convolutions},
  year    = 2015
}
@inproceedings{long2015fully,
  author    = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {3431--3440},
  title     = {Fully convolutional networks for semantic segmentation},
  year      = 2015
}
@article{springenberg2014striving,
  author  = {Springenberg, Jost Tobias and Dosovitskiy, Alexey and Brox, Thomas and Riedmiller, Martin},
  journal = {International Conference on Learning Representations},
  title   = {Striving for simplicity: {T}he all convolutional net},
  year    = 2015
}
@article{zhang2015character,
  author  = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
  journal = {Neural Information Processing Systems},
  pages   = {649--657},
  title   = {Character-level convolutional networks for text classification},
  volume  = 28,
  year    = 2015
}
@inproceedings{kiranyaz2015convolutional,
  author    = {Kiranyaz, Serkan and Ince, Turker and Hamila, Ridha and Gabbouj, Moncef},
  booktitle = {International Conference of the IEEE Engineering in Medicine and Biology Society},
  pages     = {2608--2611},
  title     = {Convolutional neural networks for patient-specific {ECG} classification},
  volume    = 37,
  year      = 2015
}
@inproceedings{tran2015learning,
  author    = {Tran, Du and Bourdev, Lubomir and Fergus, Rob and Torresani, Lorenzo and Paluri, Manohar},
  booktitle = {IEEE International Conference on Computer Vision},
  pages     = {4489--4497},
  title     = {Learning spatiotemporal features with 3{D} convolutional networks},
  year      = 2015
}
@inproceedings{wu20153d,
  author    = {Wu, Zhirong and Song, Shuran and Khosla, Aditya and Yu, Fisher and Zhang, Linguang and Tang, Xiaoou and Xiao, Jianxiong},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1912--1920},
  title     = {3{D} {S}hape{N}ets: A deep representation for volumetric shapes},
  year      = 2015
}
@inproceedings{maturana2015voxnet,
  author    = {Maturana, Daniel and Scherer, Sebastian},
  booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems},
  pages     = {922--928},
  title     = {Vox{N}et: A {3D} convolutional neural network for real-time object recognition},
  year      = 2015
}
@inproceedings{tompson2015efficient,
  author    = {Tompson, Jonathan and Goroshin, Ross and Jain, Arjun and LeCun, Yann and Bregler, Christoph},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {648--656},
  title     = {Efficient object localization using convolutional networks},
  year      = 2015
}
@inproceedings{wu2015max,
  author    = {Wu, Haibing and Gu, Xiaodong},
  booktitle = {Neural Information Processing Systems},
  pages     = {46--54},
  title     = {Max-pooling dropout for regularization of convolutional neural networks},
  volume    = 18,
  year      = 2015
}
@inproceedings{girshick2015fast,
  author    = {Girshick, Ross},
  booktitle = {IEEE International Conference on Computer Vision},
  pages     = {1440--1448},
  title     = {Fast {R-CNN}},
  year      = 2015
}
@article{ren2015faster,
  author  = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
  journal = {Neural Information Processing Systems},
  title   = {Faster {R-CNN}: Towards real-time object detection with region proposal networks},
  volume  = 28,
  year    = 2015
}
@inproceedings{ronneberger2015u,
  author    = {Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
  booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention},
  pages     = {234--241},
  title     = {U-{N}et: Convolutional networks for biomedical image segmentation},
  year      = 2015
}
@inproceedings{mahendran2015understanding,
  author    = {Mahendran, Aravindh and Vedaldi, Andrea},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5188--5196},
  title     = {Understanding deep image representations by inverting them},
  year      = 2015
}
@article{srivastava2015highway,
  author  = {Srivastava, Rupesh Kumar and Greff, Klaus and Schmidhuber, J{\"u}rgen},
  journal = {arXiv:1505.00387},
  title   = {Highway networks},
  year    = 2015
}
@article{bahdanau2014neural,
  author  = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
  journal = {International Conference on Learning Representations},
  title   = {Neural machine translation by jointly learning to align and translate},
  year    = 2015
}
@article{sennrich2015neural,
  author  = {Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
  journal = {Meeting of the Association for Computational Linguistics},
  title   = {Neural machine translation of rare words with subword units},
  year    = 2015
}
@article{luong2015effective,
  author  = {Luong, Minh-Thang and Pham, Hieu and Manning, Christopher D},
  journal = {Empirical Methods in Natural Language Processing},
  pages   = {1412--1421},
  title   = {Effective approaches to attention-based neural machine translation},
  year    = 2015
}
@article{henaff2015deep,
  author  = {Henaff, Mikael and Bruna, Joan and LeCun, Yann},
  journal = {arXiv:1506.05163},
  title   = {Deep convolutional networks on graph-structured data},
  year    = 2015
}
@article{duvenaud2015convolutional,
  author  = {Duvenaud, David K and Maclaurin, Dougal and Iparraguirre, Jorge and Bombarell, Rafael and Hirzel, Timothy and Aspuru-Guzik, Al{\'a}n and Adams, Ryan P},
  journal = {Neural Information Processing Systems},
  pages   = {2224--2232},
  title   = {Convolutional networks on graphs for learning molecular fingerprints},
  volume  = 28,
  year    = 2015
}
@inproceedings{masci2015geodesic,
  author    = {Masci, Jonathan and Boscaini, Davide and Bronstein, Michael and Vandergheynst, Pierre},
  booktitle = {IEEE International Conference on Computer Vision Workshop},
  pages     = {832--840},
  title     = {Geodesic convolutional neural networks on {R}iemannian manifolds},
  year      = 2015
}
@article{rezende2015variational,
  author  = {Rezende, Danilo Jimenez and Mohamed, Shakir},
  journal = {International Conference on Machine Learning},
  pages   = {1530--1538},
  title   = {Variational inference with normalizing flows},
  year    = 2015
}
@inproceedings{sohl2015deep,
  author    = {Sohl-Dickstein, Jascha and Weiss, Eric and Maheswaranathan, Niru and Ganguli, Surya},
  booktitle = {International Conference on Machine Learning},
  pages     = {2256--2265},
  title     = {Deep unsupervised learning using nonequilibrium thermodynamics},
  year      = 2015
}
@article{radford2015unsupervised,
  author  = {Radford, Alec and Metz, Luke and Chintala, Soumith},
  journal = {International Conference on Learning Representations},
  title   = {Unsupervised representation learning with deep convolutional generative adversarial networks},
  year    = 2015
}
@article{denton2015deep,
  author  = {Denton, Emily L and Chintala, Soumith and Fergus, Rob and others},
  journal = {Neural Information Processing Systems},
  pages   = {1486--1494},
  title   = {Deep generative image models using a {L}aplacian pyramid of adversarial networks},
  volume  = 28,
  year    = 2015
}
@article{dinh2014nice,
  author  = {Dinh, Laurent and Krueger, David and Bengio, Yoshua},
  journal = {International Conference on Learning Representations Workshop},
  title   = {N{ICE}: Non-linear independent components estimation},
  year    = 2015
}
@inproceedings{germain2015made,
  author    = {Germain, Mathieu and Gregor, Karol and Murray, Iain and Larochelle, Hugo},
  booktitle = {International Conference on Machine Learning},
  pages     = {881--889},
  title     = {{MADE}: Masked autoencoder for distribution estimation},
  year      = 2015
}
@article{bowman2015generating,
  author  = {Bowman, Samuel R and Vilnis, Luke and Vinyals, Oriol and Dai, Andrew M and Jozefowicz, Rafal and Bengio, Samy},
  journal = {ACL Conference on Computational Natural Language Learning},
  pages   = {10--21},
  title   = {Generating sentences from a continuous space},
  year    = 2015
}
@article{heess2015learning,
  author  = {Heess, Nicolas and Wayne, Gregory and Silver, David and Lillicrap, Timothy and Erez, Tom and Tassa, Yuval},
  journal = {Neural Information Processing Systems},
  pages   = {2944--2952},
  title   = {Learning continuous control policies by stochastic value gradients},
  volume  = 28,
  year    = 2015
}
@article{chung2015recurrent,
  author  = {Chung, Junyoung and Kastner, Kyle and Dinh, Laurent and Goel, Kratarth and Courville, Aaron C and Bengio, Yoshua},
  journal = {Neural Information Processing Systems},
  pages   = {2980--2988},
  title   = {A recurrent latent variable model for sequential data},
  volume  = 28,
  year    = 2015
}
@article{sohn2015learning,
  author  = {Sohn, Kihyuk and Lee, Honglak and Yan, Xinchen},
  journal = {Neural Information Processing Systems},
  pages   = {3483--3491},
  title   = {Learning structured output representation using deep conditional generative models},
  volume  = 28,
  year    = 2015
}
@inproceedings{salimans2015markov,
  author    = {Salimans, Tim and Kingma, Diederik and Welling, Max},
  booktitle = {International Conference on Machine Learning},
  pages     = {1218--1226},
  title     = {Markov chain {M}onte {C}arlo and variational inference: Bridging the gap},
  year      = 2015
}
@article{makhzani2015adversarial,
  author  = {Makhzani, Alireza and Shlens, Jonathon and Jaitly, Navdeep and Goodfellow, Ian and Frey, Brendan},
  journal = {arXiv:1511.05644},
  title   = {Adversarial autoencoders},
  year    = 2015
}
@inproceedings{liu2015deep,
  author    = {Liu, Ziwei and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
  booktitle = {IEEE International Conference on Computer Vision},
  pages     = {3730--3738},
  title     = {Deep learning face attributes in the wild},
  year      = 2015
}
@article{kingma2015variational,
  author  = {Kingma, Durk P and Salimans, Tim and Welling, Max},
  journal = {Advances in neural information processing systems},
  pages   = {2575--2583},
  title   = {Variational dropout and the local reparameterization trick},
  volume  = 28,
  year    = 2015
}
@article{mnih2015human,
  author    = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  journal   = {Nature},
  number    = 7540,
  pages     = {529--533},
  publisher = {Nature Publishing Group},
  title     = {Human-level control through deep reinforcement learning},
  volume    = 518,
  year      = 2015
}
@inproceedings{hausknecht2015deep,
  author    = {Hausknecht, Matthew and Stone, Peter},
  booktitle = {AAAI Fall Symposia},
  pages     = {29--37},
  title     = {Deep recurrent {Q}-learning for partially observable {MDP}s},
  year      = 2015
}
@inproceedings{schulman2015trust,
  author    = {Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
  booktitle = {International Conference on Machine Learning},
  pages     = {1889--1897},
  title     = {Trust region policy optimization},
  year      = 2015
}
@article{choromanska2015loss,
  author  = {Anna Choromanska and Mikael Henaff and Michael Mathieu and Gérard Ben Arous and Yann LeCun},
  journal = {International Conference on Artificial Intelligence and Statistics},
  title   = {The Loss Surfaces of Multilayer Networks},
  year    = 2015
}
@article{goodfellow2015qualitatively,
  author  = {Ian J. Goodfellow and Oriol Vinyals and Andrew M. Saxe},
  journal = {International Conference on Learning Representations},
  title   = {Qualitatively characterizing neural network optimization problems},
  year    = 2015
}
@inproceedings{han2015learning,
  author    = {Han, Song and Pool, Jeff and Tran, John and Dally, William},
  booktitle = {Neural Information Processing Systems},
  pages     = {1135--1143},
  title     = {Learning both weights and connections for efficient neural network},
  volume    = 28,
  year      = 2015
}
@article{hinton2015distilling,
  author  = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff and others},
  journal = {arXiv:1503.02531},
  number  = 7,
  title   = {Distilling the knowledge in a neural network},
  volume  = 2,
  year    = 2015
}
@article{Vallor-2014,
  author  = {Shannon Vallor},
  journal = {Philosophy \& Technology},
  pages   = {107--124},
  title   = {Moral Deskilling and Upskilling in a New Machine Age: Reflections on the Ambiguous Future of Character},
  volume  = 28,
  year    = 2015
}
@article{cordts2016cityscapes,
  author  = {Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt},
  journal = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages   = {1877--1901},
  title   = {The {C}ityscapes dataset for semantic urban scene understanding},
  volume  = {},
  year    = 2016
}
@book{goodfellow2016deep,
  author    = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron},
  publisher = {MIT Press},
  title     = {Deep learning},
  year      = 2016
}
@inproceedings{shang2016understanding,
  author    = {Shang, Wenling and Sohn, Kihyuk and Almeida, Diogo and Lee, Honglak},
  booktitle = {International Conference on Machine Learning},
  pages     = {2217--2225},
  title     = {Understanding and improving convolutional neural networks via concatenated rectified linear units},
  year      = 2016
}
@article{hendrycks2016gaussian,
  author  = {Hendrycks, Dan and Gimpel, Kevin},
  journal = {arXiv:1606.08415},
  title   = {Gaussian error linear units ({GELU}s)},
  year    = 2016
}
@article{arora2016understanding,
  author  = {Arora, Raman and Basu, Amitabh and Mianjy, Poorya and Mukherjee, Anirbit},
  journal = {arXiv:1611.01491},
  title   = {Understanding deep neural networks with rectified linear units},
  year    = 2016
}
@inproceedings{telgarsky2016benefits,
  author    = {Telgarsky, Matus},
  booktitle = {PMLR Conference on Learning Theory},
  pages     = {1517--1539},
  title     = {Benefits of depth in neural networks},
  year      = 2016
}
@inproceedings{eldan2015power,
  author    = {Eldan, Ronen and Shamir, Ohad},
  booktitle = {PMLR Conference on Learning Theory},
  pages     = {907--940},
  title     = {The power of depth for feedforward neural networks},
  year      = 2016
}
@inproceedings{cohen2015on,
  author    = {Cohen, Nadav and Sharir, Or and Shashua, Amnon},
  booktitle = {PMLR Conference on Learning Theory},
  pages     = {698--728},
  title     = {On the expressive power of deep learning: A tensor analysis},
  year      = 2016
}
@article{liang2016deep,
  author  = {Liang, Shiyu and Srikant, Rayadurgam},
  journal = {International Conference on Learning Representations},
  title   = {Why deep neural networks for function approximation?},
  year    = 2016
}
@inproceedings{burda2016importance,
  author    = {Yuri Burda and Roger B. Grosse and Ruslan Salakhutdinov},
  booktitle = {International Conference on Learning Representations},
  title     = {Importance Weighted Autoencoders},
  year      = 2016
}
@article{ruder2016overview,
  author  = {Ruder, Sebastian},
  journal = {arXiv:1609.04747},
  title   = {An overview of gradient descent optimization algorithms},
  year    = 2016
}
@article{dozat2016incorporating,
  author  = {Dozat, Timothy},
  journal = {International Conference on Learning Representations --- Workshop track},
  title   = {Incorporating {N}esterov momentum into {A}dam},
  year    = 2016
}
@inproceedings{mishkin2016need,
  author    = {Dmytro Mishkin and Jiri Matas},
  booktitle = {International Conference on Learning Representations},
  title     = {All you need is a good init},
  year      = 2016
}
@article{chen2016training,
  author  = {Chen, Tianqi and Xu, Bing and Zhang, Chiyuan and Guestrin, Carlos},
  journal = {arXiv:1604.06174},
  title   = {Training deep nets with sublinear memory cost},
  year    = 2016
}
@inproceedings{zhang2016hogwild,
  author    = {Zhang, Huan and Hsieh, Cho-Jui and Akella, Venkatesh},
  booktitle = {IEEE International Conference on Data Mining},
  doi       = {10.1109/ICDM.2016.0074},
  number    = {},
  pages     = {629--638},
  title     = {HogWild++: A New Mechanism for Decentralized Asynchronous Stochastic Gradient Descent},
  volume    = {},
  year      = 2016
}
@inproceedings{szegedy2016rethinking,
  author    = {Szegedy, Christian and Vanhoucke, Vincent and Ioffe, Sergey and Shlens, Jon and Wojna, Zbigniew},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {2818--2826},
  title     = {Rethinking the {I}nception architecture for computer vision},
  year      = 2016
}
@inproceedings{gal2015dropout,
  author    = {Gal, Yarin and Ghahramani, Zoubin},
  booktitle = {International Conference on Machine Learning},
  pages     = {1050–-1059},
  title     = {Dropout as a {B}ayesian Approximation: {R}epresenting Model Uncertainty in Deep Learning},
  year      = 2016
}
@inproceedings{xie2016disturblabel,
  author    = {Xie, Lingxi and Wang, Jingdong and Wei, Zhen and Wang, Meng and Tian, Qi},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {4753--4762},
  title     = {Disturb{L}abel: Regularizing {CNN} on the loss layer},
  year      = 2016
}
@inproceedings{li2016preconditioned,
  author    = {Li, Chunyuan and Chen, Changyou and Carlson, David and Carin, Lawrence},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {1788--1794},
  title     = {Preconditioned stochastic gradient {L}angevin dynamics for deep neural networks},
  year      = 2016
}
@inproceedings{pathak2016context,
  author    = {Pathak, Deepak and Krahenbuhl, Philipp and Donahue, Jeff and Darrell, Trevor and Efros, Alexei A},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {2536--2544},
  title     = {Context encoders: Feature learning by inpainting},
  year      = 2016
}
@inproceedings{noroozi2016unsupervised,
  author    = {Noroozi, Mehdi and Favaro, Paolo},
  booktitle = {European Conference on Computer Vision},
  pages     = {69--84},
  title     = {Unsupervised learning of visual representations by solving jigsaw puzzles},
  year      = 2016
}
@inproceedings{zhang2016colorful,
  author    = {Zhang, Richard and Isola, Phillip and Efros, Alexei A},
  booktitle = {European Conference on Computer Vision},
  pages     = {649--666},
  title     = {Colorful image colorization},
  year      = 2016
}
@article{kipf2016variational,
  author  = {Kipf, Thomas N and Welling, Max},
  journal = {NIPS Bayesian Deep Learning Workshop},
  title   = {Variational graph auto-encoders},
  year    = 2016
}
@inproceedings{redmon2016you,
  author    = {Redmon, Joseph and Divvala, Santosh and Girshick, Ross and Farhadi, Ali},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {779--788},
  title     = {You only look once: Unified, real-time object detection},
  year      = 2016
}
@article{dumoulin2016guide,
  author  = {Dumoulin, Vincent and Visin, Francesco},
  journal = {arXiv:1603.07285},
  title   = {A guide to convolution arithmetic for deep learning},
  year    = 2016
}
@misc{odena2016deconvolution,
  author       = {Odena, Augustus and Dumoulin, Vincent and Olah, Chris},
  howpublished = {Distill, \url{https://distill.pub/2016/deconv-checkerboard/}},
  journal      = {Distill},
  title        = {Deconvolution and checkerboard artifacts},
  year         = 2016
}
@inproceedings{shi2016real,
  author    = {Shi, Wenzhe and Caballero, Jose and Husz{\'a}r, Ferenc and Totz, Johannes and Aitken, Andrew P and Bishop, Rob and Rueckert, Daniel and Wang, Zehan},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1874--1883},
  title     = {Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network},
  year      = 2016
}
@article{saha2016deep,
  author  = {Saha, Suman and Singh, Gurkirt and Sapienza, Michael and Torr, Philip HS and Cuzzolin, Fabio},
  journal = {British Machine Vision Conference},
  title   = {Deep learning for detecting multiple space-time action tubes in videos},
  year    = 2016
}
@inproceedings{cohen2016group,
  author    = {Cohen, Taco and Welling, Max},
  booktitle = {International Conference on Machine Learning},
  pages     = {2990--2999},
  title     = {Group equivariant convolutional networks},
  year      = 2016
}
@inproceedings{park2016analysis,
  author    = {Park, Sungheon and Kwak, Nojun},
  booktitle = {Asian Conference on Computer Vision},
  pages     = {189--204},
  title     = {Analysis on the dropout effect in convolutional neural networks},
  year      = 2016
}
@article{jia2016dynamic,
  author  = {Jia, Xu and De Brabandere, Bert and Tuytelaars, Tinne and Gool, Luc V},
  journal = {Neural Information Processing Systems},
  title   = {Dynamic filter networks},
  volume  = 29,
  year    = 2016
}
@inproceedings{he2016deep,
  author    = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {770--778},
  title     = {Deep residual learning for image recognition},
  year      = 2016
}
@inproceedings{newell2016stacked,
  author    = {Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
  booktitle = {European Conference on Computer Vision},
  pages     = {483--499},
  title     = {Stacked hourglass networks for human pose estimation},
  year      = 2016
}
@inproceedings{he2016identity,
  author    = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle = {European Conference on Computer Vision},
  pages     = {630--645},
  title     = {Identity mappings in deep residual networks},
  year      = 2016
}
@article{zagoruyko2016wide,
  author  = {Zagoruyko, Sergey and Komodakis, Nikos},
  journal = {British Machine Vision Conference},
  title   = {Wide residual networks},
  year    = 2016
}
@article{veit2016residual,
  author  = {Veit, Andreas and Wilber, Michael J and Belongie, Serge},
  journal = {Neural Information Processing Systems},
  pages   = {550--558},
  title   = {Residual networks behave like ensembles of relatively shallow networks},
  volume  = 29,
  year    = 2016
}
@article{yamada2016deep,
  author  = {Yamada, Yoshihiro and Iwamura, Masakazu and Kise, Koichi},
  journal = {arXiv:1612.01230},
  title   = {Deep pyramidal residual networks with separated stochastic depth},
  year    = 2016
}
@inproceedings{huang2016deep,
  author    = {Huang, Gao and Sun, Yu and Liu, Zhuang and Sedra, Daniel and Weinberger, Kilian Q},
  booktitle = {European Conference on Computer Vision},
  pages     = {646--661},
  title     = {Deep networks with stochastic depth},
  year      = 2016
}
@article{ba2016layer,
  author  = {Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey E},
  journal = {arXiv:1607.06450},
  title   = {Layer normalization},
  year    = 2016
}
@article{ulyanov2016instance,
  author  = {Ulyanov, Dmitry and Vedaldi, Andrea and Lempitsky, Victor},
  journal = {arXiv:1607.08022},
  title   = {Instance normalization: The missing ingredient for fast stylization},
  year    = 2016
}
@article{salimans2016weight,
  author  = {Salimans, Tim and Kingma, Durk P},
  journal = {Neural Information Processing Systems},
  pages   = {901--909},
  title   = {Weight normalization: A simple reparameterization to accelerate training of deep neural networks},
  volume  = 29,
  year    = 2016
}
@inproceedings{cciccek20163d,
  author    = {{\c{C}}i{\c{c}}ek, {\"O}zg{\"u}n and Abdulkadir, Ahmed and Lienkamp, Soeren S and Brox, Thomas and Ronneberger, Olaf},
  booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention},
  pages     = {424--432},
  title     = {3{D} {U}-{N}et: {L}earning dense volumetric segmentation from sparse annotation},
  year      = 2016
}
@inproceedings{milletari2016v,
  author    = {Milletari, Fausto and Navab, Nassir and Ahmadi, Seyed-Ahmad},
  booktitle = {International Conference on 3D Vision},
  pages     = {565--571},
  title     = {V-{N}et: Fully convolutional neural networks for volumetric medical image segmentation},
  year      = 2016
}
@inproceedings{garg2016unsupervised,
  author    = {Garg, Ravi and Bg, Vijay Kumar and Carneiro, Gustavo and Reid, Ian},
  booktitle = {European Conference on Computer Vision},
  pages     = {740--756},
  title     = {Unsupervised {CNN} for single view depth estimation: {G}eometry to the rescue},
  year      = 2016
}
@inproceedings{van2016pixel,
  author    = {Van den Oord, Aaron and Kalchbrenner, Nal and Kavukcuoglu, Koray},
  booktitle = {International Conference on Machine Learning},
  pages     = {1747--1756},
  title     = {Pixel recurrent neural networks},
  year      = 2016
}
@article{rajpurkar2016squad,
  author  = {Rajpurkar, Pranav and Zhang, Jian and Lopyrev, Konstantin and Liang, Percy},
  journal = {Empirical Methods in Natural Language Processing},
  pages   = {2383--2392},
  title   = {S{Q}u{AD}: 100,000+ questions for machine comprehension of text},
  year    = 2016
}
@article{vijayakumar2016diverse,
  author  = {Vijayakumar, Ashwin K and Cogswell, Michael and Selvaraju, Ramprasath R and Sun, Qing and Lee, Stefan and Crandall, David and Batra, Dhruv},
  journal = {arXiv:1610.02424},
  title   = {Diverse beam search: Decoding diverse solutions from neural sequence models},
  year    = 2016
}
@inproceedings{dai2016discriminative,
  author    = {Dai, Hanjun and Dai, Bo and Song, Le},
  booktitle = {International Conference on Machine Learning},
  pages     = {2702--2711},
  title     = {Discriminative embeddings of latent variable models for structured data},
  year      = 2016
}
@article{li2015gated,
  author  = {Li, Yujia and Tarlow, Daniel and Brockschmidt, Marc and Zemel, Richard},
  journal = {International Conference on Learning Representations},
  title   = {Gated graph sequence neural networks},
  year    = 2016
}
@article{defferrard2016convolutional,
  author  = {Defferrard, Micha{\"e}l and Bresson, Xavier and Vandergheynst, Pierre},
  journal = {Neural Information Processing Systems},
  pages   = {3837--3845},
  title   = {Convolutional neural networks on graphs with fast localized spectral filtering},
  volume  = 29,
  year    = 2016
}
@article{atwood2016diffusion,
  author  = {Atwood, James and Towsley, Don},
  journal = {Neural Information Processing Systems},
  pages   = {1993--2001},
  title   = {Diffusion-convolutional neural networks},
  volume  = 29,
  year    = 2016
}
@article{kearnes2016molecular,
  author    = {Kearnes, Steven and McCloskey, Kevin and Berndl, Marc and Pande, Vijay and Riley, Patrick},
  journal   = {Journal of computer-aided molecular design},
  number    = 8,
  pages     = {595--608},
  publisher = {Springer},
  title     = {Molecular graph convolutions: {M}oving beyond fingerprints},
  volume    = 30,
  year      = 2016
}
@article{boscaini2016learning,
  author  = {Boscaini, Davide and Masci, Jonathan and Rodol{\`a}, Emanuele and Bronstein, Michael},
  journal = {Neural Information Processing Systems},
  pages   = {3189--3197},
  title   = {Learning shape correspondence with anisotropic convolutional neural networks},
  volume  = 29,
  year    = 2016
}
@article{van2016conditional,
  author  = {Van den Oord, Aaron and Kalchbrenner, Nal and Espeholt, Lasse and Vinyals, Oriol and Graves, Alex and others},
  journal = {Neural Information Processing Systems},
  pages   = {4790--4798},
  title   = {Conditional image generation with {P}ixel{CNN} decoders},
  volume  = 29,
  year    = 2016
}
@article{salimans2016improved,
  author  = {Salimans, Tim and Goodfellow, Ian and Zaremba, Wojciech and Cheung, Vicki and Radford, Alec and Chen, Xi},
  journal = {Neural Information Processing Systems},
  pages   = {2226--2234},
  title   = {Improved techniques for training {GAN}s},
  volume  = 29,
  year    = 2016
}
@article{chen2016infogan,
  author  = {Chen, Xi and Duan, Yan and Houthooft, Rein and Schulman, John and Sutskever, Ilya and Abbeel, Pieter},
  journal = {Neural Information Processing Systems},
  pages   = {2172--2180},
  title   = {Info{GAN}: Interpretable representation learning by information maximizing generative adversarial nets},
  volume  = 29,
  year    = 2016
}
@article{goodfellow2016nips,
  author  = {Goodfellow, Ian},
  journal = {NIPS 2016 Tutorial},
  title   = {Generative adversarial networks},
  year    = 2016
}
@article{mogren2016c,
  author  = {Mogren, Olof},
  journal = {NIPS 2016 Constructive Machine Learning Workshop},
  title   = {C-{RNN-GAN}: Continuous recurrent neural networks with adversarial training},
  year    = 2016
}
@article{wu2016learning,
  author  = {Wu, Jiajun and Zhang, Chengkai and Xue, Tianfan and Freeman, Bill and Tenenbaum, Josh},
  journal = {Neural Information Processing Systems},
  pages   = {82--90},
  title   = {Learning a probabilistic latent space of object shapes via 3{D} generative-adversarial modeling},
  volume  = 29,
  year    = 2016
}
@article{vondrick2016generating,
  author  = {Vondrick, Carl and Pirsiavash, Hamed and Torralba, Antonio},
  journal = {Neural Information Processing Systems},
  pages   = {613--621},
  title   = {Generating videos with scene dynamics},
  volume  = 29,
  year    = 2016
}
@article{nowozin2016f,
  author  = {Nowozin, Sebastian and Cseke, Botond and Tomioka, Ryota},
  journal = {Neural Information Processing Systems},
  pages   = {271--279},
  title   = {f-{GAN}: Training generative neural samplers using variational divergence minimization},
  volume  = 29,
  year    = 2016
}
@article{gulrajani2016pixelvae,
  author  = {Gulrajani, Ishaan and Kumar, Kundan and Ahmed, Faruk and Taiga, Adrien Ali and Visin, Francesco and Vazquez, David and Courville, Aaron},
  journal = {International Conference on Learning Representations},
  title   = {Pixel{VAE}: A latent variable model for natural images},
  year    = 2016
}
@inproceedings{reed2016generative,
  author    = {Reed, Scott and Akata, Zeynep and Yan, Xinchen and Logeswaran, Lajanugen and Schiele, Bernt and Lee, Honglak},
  booktitle = {International Conference on Machine Learning},
  pages     = {1060--1069},
  title     = {Generative adversarial text to image synthesis},
  year      = 2016
}
@article{perarnau2016invertible,
  author  = {Perarnau, Guim and Van De Weijer, Joost and Raducanu, Bogdan and {\'A}lvarez, Jose M},
  journal = {NIPS 2016 Workshop on Adversarial Training},
  title   = {Invertible conditional {GAN}s for image editing},
  year    = 2016
}
@inproceedings{yan2016attribute2image,
  author    = {Yan, Xinchen and Yang, Jimei and Sohn, Kihyuk and Lee, Honglak},
  booktitle = {European Conference on Computer Vision},
  pages     = {776--791},
  title     = {Attribute2{I}mage: Conditional image generation from visual attributes},
  year      = 2016
}
@article{reed2016learning,
  author  = {Reed, Scott E and Akata, Zeynep and Mohan, Santosh and Tenka, Samuel and Schiele, Bernt and Lee, Honglak},
  journal = {Neural Information Processing Systems},
  pages   = {217--225},
  title   = {Learning what and where to draw},
  volume  = 29,
  year    = 2016
}
@inproceedings{zhou2016learning,
  author    = {Zhou, Tinghui and Krahenbuhl, Philipp and Aubry, Mathieu and Huang, Qixing and Efros, Alexei A},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {117--126},
  title     = {Learning dense correspondence via 3{D}-guided cycle consistency},
  year      = 2016
}
@inproceedings{zhu2018generative,
  author    = {Jun{-}Yan Zhu and Philipp Kr{\"{a}}henb{\"{u}}hl and Eli Shechtman and Alexei A. Efros},
  booktitle = {European Conference on Computer Vision},
  pages     = {597--613},
  title     = {Generative Visual Manipulation on the Natural Image Manifold},
  year      = 2016
}
@article{theis2015note,
  author  = {Theis, Lucas and Oord, A{\"a}ron van den and Bethge, Matthias},
  journal = {International Conference on Learning Representations},
  title   = {A note on the evaluation of generative models},
  year    = 2016
}
@article{dinh2016density,
  author  = {Dinh, Laurent and Sohl-Dickstein, Jascha and Bengio, Samy},
  journal = {International Conference on Learning Representations},
  title   = {Density estimation using {R}eal {NVP}},
  year    = 2016
}
@article{tomczak2016improving,
  author  = {Tomczak, Jakub M and Welling, Max},
  journal = {NIPS Workshop on Bayesian Deep Learning},
  title   = {Improving variational auto-encoders using {H}ouseholder flow},
  year    = 2016
}
@article{kingma2016improved,
  author  = {Kingma, Durk P and Salimans, Tim and Jozefowicz, Rafal and Chen, Xi and Sutskever, Ilya and Welling, Max},
  journal = {Neural Information Processing Systems},
  pages   = {4736--4744},
  title   = {Improved variational inference with inverse autoregressive flow},
  volume  = 29,
  year    = 2016
}
@article{oord2016wavenet,
  author  = {Van den Oord, Aaron and Dieleman, Sander and Zen, Heiga and Simonyan, Karen and Vinyals, Oriol and Graves, Alex and Kalchbrenner, Nal and Senior, Andrew and Kavukcuoglu, Koray},
  journal = {ISCA Speech Synthesis Workshop},
  title   = {Wave{N}et: A generative model for raw audio},
  year    = 2016
}
@article{gemici2016normalizing,
  author  = {Gemici, Mevlana C and Rezende, Danilo and Mohamed, Shakir},
  journal = {NIPS Workshop on Bayesian Deep Learning},
  title   = {Normalizing flows on {R}iemannian manifolds},
  year    = 2016
}
@article{white2016sampling,
  author  = {White, Tom},
  journal = {arXiv:1609.04468},
  title   = {Sampling generative networks},
  year    = 2016
}
@article{gregor2016towards,
  author  = {Gregor, Karol and Besse, Frederic and Jimenez Rezende, Danilo and Danihelka, Ivo and Wierstra, Daan},
  journal = {Neural Information Processing Systems},
  pages   = {3549--3557},
  title   = {Towards conceptual compression},
  volume  = 29,
  year    = 2016
}
@inproceedings{van2016stable,
  author       = {Van Hoof, Herke and Chen, Nutan and Karl, Maximilian and van der Smagt, Patrick and Peters, Jan},
  booktitle    = {IEEE/RSJ International Conference on Intelligent Robots and Systems},
  organization = {IEEE},
  pages        = {3928--3934},
  title        = {Stable reinforcement learning with autoencoders for tactile and visual data},
  year         = 2016
}
@article{eslami2016attend,
  author  = {Eslami, SM and Heess, Nicolas and Weber, Theophane and Tassa, Yuval and Szepesvari, David and Hinton, Geoffrey E and others},
  journal = {Neural Information Processing Systems},
  pages   = {3225--3233},
  title   = {Attend, infer, repeat: Fast scene understanding with generative models},
  volume  = 29,
  year    = 2016
}
@article{jimenez2016unsupervised,
  author  = {Rezende Jimenez, Danilo and Eslami, SM and Mohamed, Shakir and Battaglia, Peter and Jaderberg, Max and Heess, Nicolas},
  journal = {Neural Information Processing Systems},
  pages   = {4997--5005},
  title   = {Unsupervised learning of 3{D} structure from images},
  volume  = 29,
  year    = 2016
}
@article{sonderby2016train,
  author  = {S{\o}nderby, Casper Kaae and Raiko, Tapani and Maal{\o}e, Lars and S{\o}nderby, S{\o}ren Kaae and Winther, Ole},
  journal = {arXiv:1602.02282},
  title   = {How to train deep variational autoencoders and probabilistic ladder networks},
  year    = 2016
}
@article{lamb2016discriminative,
  author  = {Lamb, Alex and Dumoulin, Vincent and Courville, Aaron},
  journal = {arXiv:1602.03220},
  title   = {Discriminative regularization for generative models},
  year    = 2016
}
@inproceedings{larsen2016autoencoding,
  author    = {Larsen, Anders Boesen Lindbo and S{\o}nderby, S{\o}ren Kaae and Larochelle, Hugo and Winther, Ole},
  booktitle = {International Conference on Machine Learning},
  pages     = {1558--1566},
  title     = {Autoencoding beyond pixels using a learned similarity metric},
  year      = 2016
}
@inproceedings{maaloe2016auxiliary,
  author    = {Maal{\o}e, Lars and S{\o}nderby, Casper Kaae and S{\o}nderby, S{\o}ren Kaae and Winther, Ole},
  booktitle = {International Conference on Machine Learning},
  pages     = {1445--1453},
  title     = {Auxiliary deep generative models},
  year      = 2016
}
@article{jiang2016variational,
  author  = {Jiang, Zhuxi and Zheng, Yin and Tan, Huachun and Tang, Bangsheng and Zhou, Hanning},
  journal = {International Joint Conference on Artificial Intelligence},
  pages   = {1965--1972},
  title   = {Variational deep embedding: An unsupervised and generative approach to clustering},
  year    = 2016
}
@inproceedings{ranganath2016hierarchical,
  author    = {Ranganath, Rajesh and Tran, Dustin and Blei, David},
  booktitle = {International Conference on Machine Learning},
  pages     = {324--333},
  title     = {Hierarchical variational models},
  year      = 2016
}
@article{brock2016neural,
  author  = {Brock, Andrew and Lim, Theodore and Ritchie, James M and Weston, Nick},
  journal = {International Conference on Learning Representations},
  title   = {Neural photo editing with introspective adversarial networks},
  year    = 2016
}
@inproceedings{hoffman2016elbo,
  author    = {Hoffman, Matthew D and Johnson, Matthew J},
  booktitle = {NIPS Workshop in Advances in Approximate Bayesian Inference},
  number    = 2,
  title     = {{ELBO} surgery: {Y}et another way to carve up the variational evidence lower bound},
  year      = 2016
}
@article{li2016renyi,
  author  = {Li, Yingzhen and Turner, Richard E},
  journal = {Neural Information Processing Systems},
  pages   = {1073--1081},
  title   = {R{\'e}nyi divergence variational inference},
  volume  = 29,
  year    = 2016
}
@inproceedings{bornschein2016bidirectional,
  author    = {Bornschein, Jorg and Shabanian, Samira and Fischer, Asja and Bengio, Yoshua},
  booktitle = {International Conference on Machine Learning},
  pages     = {2511--2519},
  title     = {Bidirectional {H}elmholtz machines},
  year      = 2016
}
@article{sonderby2016ladder,
  author  = {S{\o}nderby, Casper Kaae and Raiko, Tapani and Maal{\o}e, Lars and S{\o}nderby, S{\o}ren Kaae and Winther, Ole},
  journal = {Neural Information Processing Systems},
  pages   = {738--3746},
  title   = {Ladder variational autoencoders},
  volume  = 29,
  year    = 2016
}
@article{silver2016mastering,
  author    = {Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
  journal   = {Nature},
  number    = 7587,
  pages     = {484--489},
  publisher = {Nature Publishing Group},
  title     = {Mastering the game of {G}o with deep neural networks and tree search},
  volume    = 529,
  year      = 2016
}
@article{schaul2015prioritized,
  author  = {Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David},
  journal = {International Conference on Learning Representations},
  title   = {Prioritized experience replay},
  year    = 2016
}
@inproceedings{van2016deep,
  author    = {Van Hasselt, Hado and Guez, Arthur and Silver, David},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {2094--2100},
  title     = {Deep reinforcement learning with double {Q}-learning},
  year      = 2016
}
@inproceedings{wang2016dueling,
  author    = {Wang, Ziyu and Schaul, Tom and Hessel, Matteo and van Hasselt, Hado and Lanctot, Marc and Freitas, Nando},
  booktitle = {International Conference on Machine Learning},
  pages     = {1995--2003},
  title     = {Dueling network architectures for deep reinforcement learning},
  year      = 2016
}
@article{lillicrap2015continuous,
  author  = {Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
  journal = {International Conference on Learning Representations},
  title   = {Continuous control with deep reinforcement learning},
  year    = 2016
}
@article{schulman2015high,
  author  = {Schulman, John and Moritz, Philipp and Levine, Sergey and Jordan, Michael and Abbeel, Pieter},
  journal = {International Conference on Learning Representations},
  title   = {High-dimensional continuous control using generalized advantage estimation},
  year    = 2016
}
@inproceedings{mnih2016asynchronous,
  author    = {Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle = {International Conference on Machine Learning},
  pages     = {1928--1937},
  title     = {Asynchronous methods for deep reinforcement learning},
  year      = 2016
}
@article{han2015deep,
  author  = {Han, Song and Mao, Huizi and Dally, William J},
  journal = {International Conference on Learning Representations},
  title   = {Deep compression: Compressing deep neural networks with pruning, trained quantization and {H}uffman coding},
  year    = 2016
}
@inproceedings{zhou2016less,
  author    = {Zhou, Hao and Alvarez, Jose M and Porikli, Fatih},
  booktitle = {European Conference on Computer Vision},
  pages     = {662--677},
  title     = {Less is more: Towards compact {CNN}s},
  year      = 2016
}
@article{alvarez2016learning,
  author  = {Alvarez, Jose M and Salzmann, Mathieu},
  journal = {Neural Information Processing Systems},
  pages   = {2262--2270},
  title   = {Learning the number of neurons in deep networks},
  volume  = 29,
  year    = 2016
}
@misc{Angwin-et-al-2016,
  author       = {Julia Angwin and Jeff Larson and Surya Mattu and Lauren Kirchner},
  howpublished = {ProPublica, May 23, 2016. \url{https://www.propublica.org/article/machine-bias-risk-assessments-in-criminal-sentencing}},
  title        = {Machine bias: There's software used across the country to predict future criminals. And it's biased against blacks},
  year         = 2016
}
@book{ONeil-2016,
  author    = {Cathy O'Neil},
  publisher = {Crown},
  title     = {Weapons of Math Destruction},
  year      = 2016
}
@article{Amodei-et-al-2016,
  author  = {Dario Amodei and Chris Olah and Jacob Steinhardt and Paul Christiano and John Schulman and Dan Man{\'e}},
  journal = {arXiv:1606.06565},
  title   = {Concrete Problems in {AI} Safety},
  year    = 2016
}
@inproceedings{Ribeiro-et-al-2016,
  author    = {Ribeiro, Marco and Singh, Sameer and Guestrin, Carlos},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {97--101},
  title     = {``{W}hy Should {I} Trust You?{''}: Explaining the Predictions of Any Classifier},
  year      = 2016
}
@article{Wu-Zhang-2016,
  author  = {Xiaolin Wu and Xi Zhang},
  journal = {arXiv:1611.04135},
  title   = {Automated Inference on Criminality using Face Images},
  year    = 2016
}
@book{Brynjolfsson-McAfee-2016,
  author    = {Erik Brynjolfsson and Andrew McAfee},
  publisher = {W. W. Norton},
  title     = {The Second Machine Age: Work, Progress, and Prosperity in a Time of Brilliant Technologies},
  year      = 2016
}
@article{Metcalf-et-al-2016,
  author  = {Jacob Metcalf and Emily F. Keller and Danah Boyd},
  journal = {Council for Big Data, Ethics, and Society},
  note    = {\url{https://bdes.datasociety.net/council-output/perspectives-on-big-data-ethics-and-society/}},
  title   = {Perspectives on Big Data, Ethics, and Society},
  year    = 2016
}
@article{Ahmed-Wahed-2020,
  author  = {Nur Ahmed and Muntasir Wahed},
  journal = {arXiv:1606.06565},
  title   = {The De-democratization of {AI}: Deep Learning and the Compute Divide in Artificial Intelligence Research},
  year    = 2016
}
@book{Tetlock-Gardner-2016,
  address   = {Toronto},
  author    = {Philip E. Tetlock and Dan Gardner},
  publisher = {Signal, McClelland \& Stewart},
  title     = {Superforecasting: The Art and Science of Prediction},
  year      = 2016
}
@inproceedings{klambauer2017self,
  author    = {Klambauer, G{\"u}nter and Unterthiner, Thomas and Mayr, Andreas and Hochreiter, Sepp},
  booktitle = {Neural Information Processing Systems},
  pages     = {972--981},
  title     = {Self-normalizing neural networks},
  volume    = 30,
  year      = 2017
}
@article{ramachandran2017searching,
  author  = {Ramachandran, Prajit and Zoph, Barret and Le, Quoc V},
  journal = {arXiv:1710.05941},
  title   = {Searching for activation functions},
  year    = 2017
}
@inproceedings{montufar2017notes,
  author = {Mont{\'u}far, Guido},
  month  = {03},
  pages  = {},
  title  = {Notes on the number of linear regions of deep neural networks},
  year   = 2017
}
@article{lu2017expressive,
  author  = {Lu, Zhou and Pu, Hongming and Wang, Feicheng and Hu, Zhiqiang and Wang, Liwei},
  journal = {Neural Information Processing Systems},
  pages   = {6231--6239},
  title   = {The expressive power of neural networks: A view from the width},
  volume  = 30,
  year    = 2017
}
@inproceedings{safran2016depth,
  author    = {Safran, Itay and Shamir, Ohad},
  booktitle = {International Conference on Machine Learning},
  pages     = {2979--2987},
  title     = {Depth-width tradeoffs in approximating natural functions with neural networks},
  year      = 2017
}
@article{poggio2016why,
  author    = {Poggio, Tomaso and Mhaskar, Hrushikesh and Rosasco, Lorenzo and Miranda, Brando and Liao, Qianli},
  journal   = {International Journal of Automation and Computing},
  number    = 5,
  pages     = {503--519},
  publisher = {Springer},
  title     = {Why and when can deep-but not shallow-networks avoid the curse of dimensionality: {A} review},
  volume    = 14,
  year      = 2017
}
@inproceedings{lin2017focal,
  author    = {Lin, Tsung-Yi and Goyal, Priya and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {2980--2988},
  title     = {Focal loss for dense object detection},
  year      = 2017
}
@inproceedings{ng2017predicting,
  author    = {Ng, Nathan H and Gabriel, Rodney A and McAuley, Julian and Elkan, Charles and Lipton, Zachary C},
  booktitle = {PMLR Machine Learning for Healthcare Conference},
  pages     = {100--111},
  title     = {Predicting surgery duration with neural heteroscedastic regression},
  year      = 2017
}
@misc{goh2017why,
  author       = {Goh, Gabriel},
  doi          = {10.23915/distill.00006},
  howpublished = {Distill, \url{http://distill.pub/2017/momentum}},
  journal      = {Distill},
  title        = {Why Momentum Really Works},
  year         = 2017
}
@article{wilson2017marginal,
  author  = {Wilson, Ashia C and Roelofs, Rebecca and Stern, Mitchell and Srebro, Nati and Recht, Benjamin},
  journal = {Neural Information Processing Systems},
  pages   = {4148--4158},
  title   = {The marginal value of adaptive gradient methods in machine learning},
  volume  = 30,
  year    = 2017
}
@article{keskar2017improving,
  author  = {Keskar, Nitish Shirish and Socher, Richard},
  journal = {arXiv:1712.07628},
  title   = {Improving Generalization Performance by Switching from {A}dam to {SGD}},
  year    = 2017
}
@article{gomez2017reversible,
  author  = {Gomez, Aidan N and Ren, Mengye and Urtasun, Raquel and Grosse, Roger B},
  journal = {Neural Information Processing Systems},
  pages   = {2214--2224},
  title   = {The reversible residual network: {B}ackpropagation without storing activations},
  volume  = 30,
  year    = 2017
}
@article{neyshabur2017exploring,
  author  = {Neyshabur, Behnam and Bhojanapalli, Srinadh and McAllester, David and Srebro, Nati},
  journal = {Neural Information Processing Systems},
  pages   = {5947--5956},
  title   = {Exploring generalization in deep learning},
  volume  = 30,
  year    = 2017
}
@article{li2016hyperband,
  author  = {Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar, Ameet},
  journal = {Journal of Machine Learning Research},
  number  = 1,
  pages   = {6765--6816},
  title   = {Hyperband: {A} novel bandit-based approach to hyperparameter optimization},
  volume  = 18,
  year    = 2017
}
@article{kukavcka2017regularization,
  author  = {Kuka{\v{c}}ka, Jan and Golkov, Vladimir and Cremers, Daniel},
  journal = {arXiv:1710.10686},
  title   = {Regularization for deep learning: A taxonomy},
  year    = 2017
}
@article{zhang2016understanding,
  author  = {Chiyuan Zhang and Samy Bengio and Moritz Hardt and Benjamin Recht and Oriol Vinyals},
  journal = {International Conference on Learning Representations},
  title   = {Understanding deep learning requires rethinking generalization},
  year    = 2017
}
@inproceedings{bartlett2017spectrally,
  author    = {Bartlett, Peter L and Foster, Dylan J and Telgarsky, Matus J},
  booktitle = {Neural Information Processing Systems},
  pages     = {6240--6249},
  title     = {Spectrally-normalized margin bounds for neural networks},
  volume    = 30,
  year      = 2017
}
@article{yoshida2017spectral,
  author  = {Yoshida, Yuichi and Miyato, Takeru},
  journal = {arXiv:1705.10941},
  title   = {Spectral norm regularization for improving the generalizability of deep learning},
  year    = 2017
}
@article{lakshminarayanan2017simple,
  author  = {Lakshminarayanan, Balaji and Pritzel, Alexander and Blundell, Charles},
  journal = {Neural Information Processing Systems},
  pages   = {6402--6413},
  title   = {Simple and scalable predictive uncertainty estimation using deep ensembles},
  volume  = 30,
  year    = 2017
}
@article{huang2017snapshot,
  author  = {Huang, Gao and Li, Yixuan and Pleiss, Geoff and Liu, Zhuang and Hopcroft, John E and Weinberger, Kilian Q},
  journal = {International Conference on Learning Representations},
  title   = {Snapshot ensembles: Train 1, get {M} for free},
  year    = 2017
}
@article{kendall2017uncertainties,
  author  = {Kendall, Alex and Gal, Yarin},
  journal = {Neural Information Processing Systems},
  pages   = {5574--5584},
  title   = {What uncertainties do we need in {B}ayesian deep learning for computer vision?},
  volume  = 30,
  year    = 2017
}
@article{shen2017continuous,
  author  = {Shen, Xu and Tian, Xinmei and Liu, Tongliang and Xu, Fang and Tao, Dacheng},
  journal = {IEEE Transactions on Neural Networks and Learning Systems},
  number  = 9,
  pages   = {3926--3937},
  title   = {Continuous dropout},
  volume  = 29,
  year    = 2017
}
@article{devries2017dataset,
  author  = {DeVries, Terrance and Taylor, Graham W},
  journal = {arXiv:1702.05538},
  title   = {Dataset augmentation in feature space},
  year    = 2017
}
@inproceedings{chorowski2016towards,
  author    = {Jan Chorowski and Navdeep Jaitly},
  booktitle = {INTERSPEECH},
  pages     = {523--527},
  title     = {Towards Better Decoding and Language Model Integration in Sequence to Sequence Models},
  year      = 2017
}
@article{vaswani2017attention,
  author  = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  journal = {Neural Information Processing Systems},
  pages   = {5998--6008},
  title   = {Attention is all you need},
  volume  = 30,
  year    = 2017
}
@article{pererya2017regularizing,
  author  = {Pereyra, Gabriel and Tucker, George and Chorowski, Jan and Kaiser, Łukasz and Hinton, Geoffrey},
  doi     = {10.48550/ARXIV.1701.06548},
  journal = {International Conference on Learning Representations Workshop},
  title   = {Regularizing neural networks by penalizing confident output distributions},
  year    = 2017
}
@article{keskar2016on,
  author  = {Nitish Shirish Keskar and Dheevatsa Mudigere and Jorge Nocedal and Mikhail Smelyanskiy and Ping Tak Peter Tang},
  journal = {International Conference on Learning Representations},
  title   = {On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima},
  year    = 2017
}
@article{kang2017patchshuffle,
  author  = {Kang, Guoliang and Dong, Xuanyi and Zheng, Liang and Yang, Yi},
  journal = {arXiv:1707.07103},
  title   = {Patch{S}huffle regularization},
  year    = 2017
}
@inproceedings{calimeri2017biomedical,
  author    = {Calimeri, Francesco and Marzullo, Aldo and Stamile, Claudio and Terracina, Giorgio},
  booktitle = {International Conference on Artificial Neural Networks},
  pages     = {626--634},
  title     = {Biomedical data augmentation using adversarial neural networks},
  year      = 2017
}
@inproceedings{abesser2017acoustic,
  author    = {Abe{\ss}er, Jakob and Mimilakis, Stylianos Ioannis and Gr{\"a}fe, Robert and Lukashevich, Hanna and Fraunhofer, IDMT},
  booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events},
  pages     = {7--11},
  title     = {Acoustic scene classification by combining autoencoder-based dimensionality reduction and convolutional neural networks},
  year      = 2017
}
@article{salamon2017deep,
  author  = {Salamon, Justin and Bello, Juan Pablo},
  journal = {IEEE Signal Processing Letters},
  number  = 3,
  pages   = {279--283},
  title   = {Deep convolutional neural networks and data augmentation for environmental sound classification},
  volume  = 24,
  year    = 2017
}
@article{zhang2017mixup,
  author  = {Zhang, Hongyi and Cisse, Moustapha and Dauphin, Yann N and Lopez-Paz, David},
  journal = {International Conference on Learning Representations},
  title   = {mixup: Beyond empirical risk minimization},
  year    = 2017
}
@inproceedings{mun2017deep,
  author    = {Mun, Seongkyu and Shon, Suwon and Kim, Wooil and Han, David K. and Ko, Hanseok},
  booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  doi       = {10.1109/ICASSP.2017.7952265},
  number    = {},
  pages     = {796--800},
  title     = {Deep Neural Network based learning and transferring mid-level audio features for acoustic scene classification},
  volume    = {},
  year      = 2017
}
@inproceedings{li2017robust,
  author    = {Li, Yitong and Cohn, Trevor and Baldwin, Timothy},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {21--27},
  title     = {Robust training under linguistic adversity},
  year      = 2017
}
@inproceedings{szegedy2017inception,
  author    = {Szegedy, Christian and Ioffe, Sergey and Vanhoucke, Vincent and Alemi, Alexander A},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {4278--4284},
  title     = {Inception-v4, {I}nception-{R}esnet and the impact of residual connections on learning},
  year      = 2017
}
@article{rawat2017deep,
  author    = {Rawat, Waseem and Wang, Zenghui},
  journal   = {Neural Computation},
  number    = 9,
  pages     = {2352--2449},
  publisher = {MIT Press},
  title     = {Deep convolutional neural networks for image classification: A comprehensive review},
  volume    = 29,
  year      = 2017
}
@article{howard2017mobilenets,
  author  = {Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
  journal = {arXiv:1704.04861},
  title   = {Mobile{N} ets: Efficient convolutional neural networks for mobile vision applications},
  year    = 2017
}
@inproceedings{xie2017aggregated,
  author    = {Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1492--1500},
  title     = {Aggregated residual transformations for deep neural networks},
  year      = 2017
}
@inproceedings{conneau2016very,
  author    = {Conneau, Alexis and Schwenk, Holger and Barrault, Lo{\"\i}c and Lecun, Yann},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {1107--1116},
  title     = {Very Deep Convolutional Networks for Text Classification},
  year      = 2017
}
@inproceedings{worrall2017harmonic,
  author    = {Worrall, Daniel E and Garbin, Stephan J and Turmukhambetov, Daniyar and Brostow, Gabriel J},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5028--5037},
  title     = {Harmonic networks: Deep translation and rotation equivariance},
  year      = 2017
}
@article{devries2017improved,
  author  = {DeVries, Terrance and Taylor, Graham W},
  journal = {arXiv:1708.04552},
  title   = {Improved regularization of convolutional neural networks with {C}utout},
  year    = 2017
}
@inproceedings{dai2017deformable,
  author    = {Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {764--773},
  title     = {Deformable convolutional networks},
  year      = 2017
}
@inproceedings{lin2017feature,
  author    = {Lin, Tsung-Yi and Doll{\'a}r, Piotr and Girshick, Ross and He, Kaiming and Hariharan, Bharath and Belongie, Serge},
  booktitle = {IEEE Computer Vision \& Pattern Recognition},
  pages     = {2117--2125},
  title     = {Feature pyramid networks for object detection},
  year      = 2017
}
@inproceedings{bau2017network,
  author    = {Bau, David and Zhou, Bolei and Khosla, Aditya and Oliva, Aude and Torralba, Antonio},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {6541--6549},
  title     = {Network dissection: Quantifying interpretability of deep visual representations},
  year      = 2017
}
@inproceedings{balduzzi2017shattered,
  author    = {Balduzzi, David and Frean, Marcus and Leary, Lennox and Lewis, JP and Ma, Kurt Wan-Duo and McWilliams, Brian},
  booktitle = {International Conference on Machine Learning},
  pages     = {342--350},
  title     = {The shattered gradients problem: If {R}es{N}ets are the answer, then what is the question?},
  year      = 2017
}
@article{orhan2017skip,
  author  = {Orhan, A Emin and Pitkow, Xaq},
  journal = {International Conference on Learning Representations},
  title   = {Skip connections eliminate singularities},
  year    = 2017
}
@article{gastaldi2017shake,
  author  = {Gastaldi, Xavier},
  journal = {arXiv:1705.07485},
  title   = {Shake-shake regularization},
  year    = 2017
}
@article{gastaldi2017shake2,
  author = {Gastaldi, Xavier},
  title  = {Shake-shake regularization of 3-branch residual networks},
  year   = 2017
}
@article{hoffer2017train,
  author  = {Hoffer, Elad and Hubara, Itay and Soudry, Daniel},
  journal = {Neural Information Processing Systems},
  pages   = {1731--1741},
  title   = {Train longer, generalize better: Closing the generalization gap in large batch training of neural networks},
  volume  = 30,
  year    = 2017
}
@article{ioffe2017batch,
  author  = {Ioffe, Sergey},
  journal = {Neural Information Processing Systems},
  pages   = {1945--1953},
  title   = {Batch renormalization: Towards reducing minibatch dependence in batch-normalized models},
  volume  = 30,
  year    = 2017
}
@inproceedings{huang2017densely,
  author    = {Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {4700--4708},
  title     = {Densely connected convolutional networks},
  year      = 2017
}
@inproceedings{isola2017image,
  author    = {Isola, Phillip and Zhu, Jun-Yan and Zhou, Tinghui and Efros, Alexei A},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1125--1134},
  title     = {Image-to-image translation with conditional adversarial networks},
  year      = 2017
}
@article{daniluk2017frustratingly,
  author  = {Daniluk, Micha{\l} and Rockt{\"a}schel, Tim and Welbl, Johannes and Riedel, Sebastian},
  journal = {International Conference on Learning Representations},
  title   = {Frustratingly short attention spans in neural language modeling},
  year    = 2017
}
@article{ha2016hypernetworks,
  author  = {Ha, David and Dai, Andrew and Le, Quoc V},
  journal = {International Conference on Learning Representations},
  title   = {Hypernetworks},
  year    = 2017
}
@article{sabour2017dynamic,
  author  = {Sabour, Sara and Frosst, Nicholas and Hinton, Geoffrey E},
  journal = {Neural Information Processing Systems},
  pages   = {3856--3866},
  title   = {Dynamic routing between capsules},
  volume  = 30,
  year    = 2017
}
@inproceedings{sun2017revisiting,
  author    = {Sun, Chen and Shrivastava, Abhinav and Singh, Saurabh and Gupta, Abhinav},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {843--852},
  title     = {Revisiting unreasonable effectiveness of data in deep learning era},
  year      = 2017
}
@article{hamilton2017inductive,
  author  = {Hamilton, Will and Ying, Zhitao and Leskovec, Jure},
  journal = {Neural Information Processing Systems},
  pages   = {1024--1034},
  title   = {Inductive representation learning on large graphs},
  volume  = 30,
  year    = 2017
}
@article{kipf2016semi,
  author  = {Kipf, Thomas N and Welling, Max},
  journal = {International Conference on Learning Representations},
  title   = {Semi-supervised classification with graph convolutional networks},
  year    = 2017
}
@inproceedings{gilmer2017neural,
  author    = {Gilmer, Justin and Schoenholz, Samuel S and Riley, Patrick F and Vinyals, Oriol and Dahl, George E},
  booktitle = {International Conference on Machine Learning},
  pages     = {1263--1272},
  title     = {Neural message passing for quantum chemistry},
  year      = 2017
}
@inproceedings{monti2017geometric,
  author    = {Monti, Federico and Boscaini, Davide and Masci, Jonathan and Rodola, Emanuele and Svoboda, Jan and Bronstein, Michael M},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5115--5124},
  title     = {Geometric deep learning on graphs and manifolds using mixture model {CNN}s},
  year      = 2017
}
@inproceedings{pham2017column,
  author    = {Pham, Trang and Tran, Truyen and Phung, Dinh and Venkatesh, Svetha},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {2485--2491},
  title     = {Column networks for collective classification},
  year      = 2017
}
@article{zhou2017graph,
  author  = {Zhou, Zhenpeng and Li, Xiaocheng},
  journal = {arXiv:1706.09916},
  title   = {Graph convolution: {A} high-order and adaptive approach},
  year    = 2017
}
@article{hamilton2017representation,
  author  = {Hamilton, William L and Ying, Rex and Leskovec, Jure},
  journal = {{IEEE} Data Engineering Bulletin},
  number  = 3,
  pages   = {52--74},
  title   = {Representation learning on graphs: Methods and applications},
  volume  = 40,
  year    = 2017
}
@article{zaheer2017deep,
  author  = {Zaheer, Manzil and Kottur, Satwik and Ravanbakhsh, Siamak and Poczos, Barnabas and Salakhutdinov, Russ R and Smola, Alexander J},
  journal = {Neural Information Processing Systems},
  pages   = {3391--3401},
  title   = {Deep sets},
  volume  = 30,
  year    = 2017
}
@article{heusel2017gans,
  author  = {Heusel, Martin and Ramsauer, Hubert and Unterthiner, Thomas and Nessler, Bernhard and Hochreiter, Sepp},
  journal = {Neural Information Processing Systems},
  pages   = {6626--6637},
  title   = {{GAN}s trained by a two time-scale update rule converge to a local {N}ash equilibrium},
  volume  = 30,
  year    = 2017
}
@inproceedings{arjovsky2017wasserstein,
  author    = {Arjovsky, Martin and Chintala, Soumith and Bottou, L{\'e}on},
  booktitle = {International Conference on Machine Learning},
  pages     = {214--223},
  title     = {Wasserstein generative adversarial networks},
  year      = 2017
}
@article{arjovsky2017towards,
  author  = {Arjovsky, Martin and Bottou, L{\'e}on},
  journal = {International Conference on Learning Representations},
  title   = {Towards principled methods for training generative adversarial networks},
  year    = 2017
}
@misc{hermann2017wasserstein,
  author       = {Hermann, Vincent},
  howpublished = {\url{https://vincentherrmann.github.io/blog/wasserstein/}},
  title        = {Wasserstein {GAN} and the {K}antorovich-{R}ubinstein Duality},
  year         = 2017
}
@inproceedings{odena2017conditional,
  author    = {Odena, Augustus and Olah, Christopher and Shlens, Jonathon},
  booktitle = {International Conference on Machine Learning},
  pages     = {2642--2651},
  title     = {Conditional image synthesis with auxiliary classifier {GAN}s},
  year      = 2017
}
@inproceedings{ledig2017photo,
  author    = {Ledig, Christian and Theis, Lucas and Husz{\'a}r, Ferenc and Caballero, Jose and Cunningham, Andrew and Acosta, Alejandro and Aitken, Andrew and Tejani, Alykhan and Totz, Johannes and Wang, Zehan and others},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {4681--4690},
  title     = {Photo-realistic single image super-resolution using a generative adversarial network},
  year      = 2017
}
@inproceedings{zhu2017unpaired,
  author    = {Zhu, Jun-Yan and Park, Taesung and Isola, Phillip and Efros, Alexei A},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {2223--2232},
  title     = {Unpaired image-to-image translation using cycle-consistent adversarial networks},
  year      = 2017
}
@inproceedings{susmelj2017abc,
  author    = {Susmelj, Igor and Agustsson, Eirikur and Timofte, Radu},
  booktitle = {ICML Workshop on Implicit Models},
  title     = {{ABC-GAN}: Adaptive blur and control for improved training stability of generative adversarial networks},
  year      = 2017
}
@inproceedings{zhang2017zipnet,
  author    = {Zhang, Chaoyun and Ouyang, Xi and Patras, Paul},
  booktitle = {International Conference on emerging Networking EXperiments and Technologies},
  pages     = {363--375},
  title     = {Zip{N}et-{GAN}: Inferring fine-grained mobile traffic patterns via a generative adversarial neural network},
  year      = 2017
}
@article{saito2017statistical,
  author    = {Saito, Yuki and Takamichi, Shinnosuke and Saruwatari, Hiroshi},
  journal   = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
  number    = 1,
  pages     = {84--96},
  publisher = {IEEE},
  title     = {Statistical parametric speech synthesis incorporating generative adversarial networks},
  volume    = 26,
  year      = 2017
}
@article{kaneko2017parallel,
  author  = {Kaneko, Takuhiro and Kameoka, Hirokazu},
  journal = {arXiv:1711.11293},
  title   = {Parallel-data-free voice conversion using cycle-consistent adversarial networks},
  year    = 2017
}
@article{lin2017adversarial,
  author  = {Lin, Kevin and Li, Dianqi and He, Xiaodong and Zhang, Zhengyou and Sun, Ming-Ting},
  journal = {Neural Information Processing Systems},
  pages   = {3155--3165},
  title   = {Adversarial ranking for language generation},
  volume  = 30,
  year    = 2017
}
@article{guimaraes2017objective,
  author  = {Guimaraes, Gabriel Lima and Sanchez-Lengeling, Benjamin and Outeiral, Carlos and Farias, Pedro Luis Cunha and Aspuru-Guzik, Al{\'a}n},
  journal = {arXiv:1705.10843},
  title   = {Objective-reinforced generative adversarial networks ({ORGAN}) for sequence generation models},
  year    = 2017
}
@inproceedings{yu2017seqgan,
  author    = {Yu, Lantao and Zhang, Weinan and Wang, Jun and Yu, Yong},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {2852--2858},
  title     = {Seq{GAN}: Sequence generative adversarial nets with policy gradient},
  year      = 2017
}
@article{killoran2017generating,
  author  = {Killoran, Nathan and Lee, Leo J and Delong, Andrew and Duvenaud, David and Frey, Brendan J},
  journal = {NIPS 2017 Workshop on Computational Biology},
  title   = {Generating and designing {DNA} with deep generative models},
  year    = 2017
}
@article{metz2016unrolled,
  author  = {Metz, Luke and Poole, Ben and Pfau, David and Sohl-Dickstein, Jascha},
  journal = {International Conference on Learning Representations},
  title   = {Unrolled generative adversarial networks},
  year    = 2017
}
@inproceedings{mao2017least,
  author    = {Mao, Xudong and Li, Qing and Xie, Haoran and Lau, Raymond YK and Wang, Zhen and Paul Smolley, Stephen},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {2794--2802},
  title     = {Least squares generative adversarial networks},
  year      = 2017
}
@article{zhao2016energy,
  author  = {Zhao, Junbo and Mathieu, Michael and LeCun, Yann},
  journal = {International Conference on Learning Representations},
  title   = {Energy-based generative adversarial network},
  year    = 2017
}
@article{bellemare2017cramer,
  author  = {Bellemare, Marc G and Danihelka, Ivo and Dabney, Will and Mohamed, Shakir and Lakshminarayanan, Balaji and Hoyer, Stephan and Munos, R{\'e}mi},
  journal = {arXiv:1705.10743},
  title   = {The {C}ramer distance as a solution to biased {W}asserstein gradients},
  year    = 2017
}
@article{marchesi2017megapixel,
  author  = {Marchesi, Marco},
  journal = {arXiv:1706.00082},
  title   = {Megapixel size image creation using generative adversarial networks},
  year    = 2017
}
@inproceedings{arora2017generalization,
  author    = {Arora, Sanjeev and Ge, Rong and Liang, Yingyu and Ma, Tengyu and Zhang, Yi},
  booktitle = {International Conference on Machine Learning},
  pages     = {224--232},
  title     = {Generalization and equilibrium in generative adversarial nets ({GAN}s)},
  year      = 2017
}
@article{wu2016quantitative,
  author  = {Wu, Yuhuai and Burda, Yuri and Salakhutdinov, Ruslan and Grosse, Roger},
  journal = {International Conference on Learning Representations},
  title   = {On the quantitative analysis of decoder-based generative models},
  year    = 2017
}
@article{arora2017gans,
  author  = {Arora, Sanjeev and Zhang, Yi},
  journal = {arXiv:1706.08224},
  title   = {Do {GAN}s actually learn the distribution? {A}n empirical study},
  year    = 2017
}
@article{srivastava2017veegan,
  author  = {Srivastava, Akash and Valkov, Lazar and Russell, Chris and Gutmann, Michael U and Sutton, Charles},
  journal = {Neural Information Processing Systems},
  pages   = {3308--3318},
  title   = {{VEEGAN}: Reducing mode collapse in {GAN}s using implicit variational learning},
  volume  = 30,
  year    = 2017
}
@inproceedings{zhang2017stackgan,
  author    = {Zhang, Han and Xu, Tao and Li, Hongsheng and Zhang, Shaoting and Wang, Xiaogang and Huang, Xiaolei and Metaxas, Dimitris N},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {5907--5915},
  title     = {Stack{GAN}: Text to photo-realistic image synthesis with stacked generative adversarial networks},
  year      = 2017
}
@inproceedings{huang2017stacked,
  author    = {Huang, Xun and Li, Yixuan and Poursaeed, Omid and Hopcroft, John and Belongie, Serge},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5077--5086},
  title     = {Stacked generative adversarial networks},
  year      = 2017
}
@inproceedings{luo2017learning,
  author    = {Luo, Junyu and Xu, Yong and Tang, Chenwei and Lv, Jiancheng},
  booktitle = {Neural Information Processing Systems},
  pages     = {207--216},
  title     = {Learning inverse mapping by autoencoder based generative adversarial nets},
  volume    = 30,
  year      = 2017
}
@article{dumoulin2016adversarially,
  author  = {Dumoulin, Vincent and Belghazi, Ishmael and Poole, Ben and Mastropietro, Olivier and Lamb, Alex and Arjovsky, Martin and Courville, Aaron},
  journal = {International Conference on Learning Representations},
  title   = {Adversarially learned inference},
  year    = 2017
}
@article{lipton2017precise,
  author  = {Lipton, Zachary C and Tripathi, Subarna},
  journal = {International Conference on Learning Representations},
  title   = {Precise recovery of latent vectors from generative adversarial networks},
  year    = 2017
}
@article{zheng2017convolutional,
  author  = {Zheng, Guoqing and Yang, Yiming and Carbonell, Jaime},
  journal = {arXiv:1711.02255},
  title   = {Convolutional normalizing flows},
  year    = 2017
}
@article{papamakarios2017masked,
  author  = {Papamakarios, George and Pavlakou, Theo and Murray, Iain},
  journal = {Neural Information Processing Systems},
  pages   = {2338--2347},
  title   = {Masked autoregressive flow for density estimation},
  volume  = 30,
  year    = 2017
}
@article{hsu2017learning,
  author  = {Hsu, Wei-Ning and Zhang, Yu and Glass, James},
  journal = {INTERSPEECH},
  pages   = {1273--1277},
  title   = {Learning latent representations for speech generation and transformation},
  year    = 2017
}
@inproceedings{hu2017toward,
  author    = {Hu, Zhiting and Yang, Zichao and Liang, Xiaodan and Salakhutdinov, Ruslan and Xing, Eric P},
  booktitle = {International Conference on Machine Learning},
  pages     = {1587--1596},
  title     = {Toward controlled generation of text},
  year      = 2017
}
@inproceedings{ravanbakhsh2017enabling,
  author    = {Ravanbakhsh, Siamak and Lanusse, Francois and Mandelbaum, Rachel and Schneider, Jeff and Poczos, Barnabas},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {1488--1494},
  title     = {Enabling dark energy science with deep generative models of galaxy images},
  year      = 2017
}
@article{van2017neural,
  author  = {Van Den Oord, Aaron and Vinyals, Oriol and others},
  journal = {Neural Information Processing Systems},
  pages   = {6306--6315},
  title   = {Neural discrete representation learning},
  volume  = 30,
  year    = 2017
}
@article{rolfe2016discrete,
  author  = {Rolfe, Jason Tyler},
  journal = {International Conference on Learning Representations},
  title   = {Discrete variational autoencoders},
  year    = 2017
}
@article{hsu2017voice,
  author  = {Hsu, Chin-Cheng and Hwang, Hsin-Te and Wu, Yi-Chiao and Tsao, Yu and Wang, Hsin-Min},
  journal = {INTERSPEECH},
  pages   = {3364--3368},
  title   = {Voice conversion from unaligned corpora using variational autoencoding {W}asserstein generative adversarial networks},
  year    = 2017
}
@article{zhao2017towards,
  author  = {Zhao, Shengjia and Song, Jiaming and Ermon, Stefano},
  journal = {arXiv:1702.08658},
  title   = {Towards deeper understanding of variational autoencoding models},
  year    = 2017
}
@article{chen2016variational,
  author  = {Chen, Xi and Kingma, Diederik P and Salimans, Tim and Duan, Yan and Dhariwal, Prafulla and Schulman, John and Sutskever, Ilya and Abbeel, Pieter},
  journal = {International Conference on Learning Representations},
  title   = {Variational lossy autoencoder},
  year    = 2017
}
@article{zhao2017infovae,
  author  = {Zhao, Shengjia and Song, Jiaming and Ermon, Stefano},
  journal = {AAAI Conference on Artificial Intelligence},
  pages   = {5885--5892},
  title   = {Info{VAE}: {B}alancing Learning and Inference in Variational Autoencoders},
  year    = 2017
}
@inproceedings{higgins2017beta,
  author    = {Higgins, Irina and Matthey, Loic and Pal, Arka and Burgess, Christopher and Glorot, Xavier and Botvinick, Matthew and Mohamed, Shakir and Lerchner, Alexander},
  booktitle = {International Conference on Learning Representations},
  title     = {{B}eta-{VAE}: Learning basic visual concepts with a constrained variational framework},
  year      = 2017
}
@article{roeder2017sticking,
  author  = {Roeder, Geoffrey and Wu, Yuhuai and Duvenaud, David K},
  journal = {Neural Information Processing Systems},
  pages   = {6925--6934},
  title   = {Sticking the landing: {S}imple, lower-variance gradient estimators for variational inference},
  volume  = 30,
  year    = 2017
}
@article{li2017deep,
  author  = {Li, Yuxi},
  journal = {arXiv:1701.07274},
  title   = {Deep reinforcement learning: An overview},
  year    = 2017
}
@article{arulkumaran2017deep,
  author    = {Arulkumaran, Kai and Deisenroth, Marc Peter and Brundage, Miles and Bharath, Anil Anthony},
  journal   = {IEEE Signal Processing Magazine},
  number    = 6,
  pages     = {26--38},
  publisher = {IEEE},
  title     = {Deep reinforcement learning: A brief survey},
  volume    = 34,
  year      = 2017
}
@inproceedings{bellemare2017distributional,
  author    = {Bellemare, Marc G and Dabney, Will and Munos, R{\'e}mi},
  booktitle = {International Conference on Machine Learning},
  pages     = {449--458},
  title     = {A distributional perspective on reinforcement learning},
  year      = 2017
}
@article{schulman2017proximal,
  author  = {Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal = {arXiv:1707.06347},
  title   = {Proximal policy optimization algorithms},
  year    = 2017
}
@article{wang2016sample,
  author  = {Wang, Ziyu and Bapst, Victor and Heess, Nicolas and Mnih, Volodymyr and Munos, Remi and Kavukcuoglu, Koray and de Freitas, Nando},
  journal = {International Conference on Learning Representations},
  title   = {Sample efficient actor-critic with experience replay},
  year    = 2017
}
@article{hussein2017imitation,
  author    = {Hussein, Ahmed and Gaber, Mohamed Medhat and Elyan, Eyad and Jayne, Chrisina},
  journal   = {ACM Computing Surveys},
  number    = 2,
  pages     = {1--35},
  publisher = {ACM New York, NY, USA},
  title     = {Imitation learning: A survey of learning methods},
  volume    = 50,
  year      = 2017
}
@inproceedings{chollet2017xception,
  author    = {Chollet, Fran{\c{c}}ois},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1251--1258},
  title     = {Xception: Deep learning with depthwise separable convolutions},
  year      = 2017
}
@inproceedings{nguyen2017loss,
  author    = {Nguyen, Quynh and Hein, Matthias},
  booktitle = {International Conference on Machine Learning},
  pages     = {2603--2612},
  title     = {The loss surface of deep and wide neural networks},
  year      = 2017
}
@inproceedings{pennington2017geometry,
  author    = {Pennington, Jeffrey and Bahri, Yasaman},
  booktitle = {International Conference on Machine Learning},
  pages     = {2798--2806},
  title     = {Geometry of neural network loss surfaces via random matrix theory},
  year      = 2017
}
@inproceedings{dinh2017sharp,
  author    = {Laurent Dinh and Razvan Pascanu and Samy Bengio and Yoshua Bengio},
  booktitle = {International Conference on Machine Learning},
  pages     = {1019--1028},
  title     = {Sharp Minima Can Generalize For Deep Nets},
  year      = 2017
}
@article{li2016pruning,
  author  = {Li, Hao and Kadav, Asim and Durdanovic, Igor and Samet, Hanan and Graf, Hans Peter},
  journal = {International Conference on Learning Representations},
  title   = {Pruning filters for efficient {C}onv{N}ets},
  year    = 2017
}
@inproceedings{luo2017thinet,
  author    = {Luo, Jian-Hao and Wu, Jianxin and Lin, Weiyao},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {5058--5066},
  title     = {Thi{N}et: A filter level pruning method for deep neural network compression},
  year      = 2017
}
@inproceedings{he2017channel,
  author    = {He, Yihui and Zhang, Xiangyu and Sun, Jian},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {1389--1397},
  title     = {Channel pruning for accelerating very deep neural networks},
  year      = 2017
}
@article{zagoruyko2016paying,
  author  = {Zagoruyko, Sergey and Komodakis, Nikos},
  journal = {International Conference on Learning Representations},
  title   = {Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer},
  year    = 2017
}
@article{urban2016deep,
  author  = {Urban, Gregor and Geras, Krzysztof J and Kahou, Samira Ebrahimi and Aslan, Ozlem and Wang, Shengjie and Caruana, Rich and Mohamed, Abdelrahman and Philipose, Matthai and Richardson, Matt},
  journal = {International Conference on Learning Representations},
  title   = {Do deep convolutional nets really need to be deep and convolutional?},
  year    = 2017
}
@inproceedings{Danks-London-2017,
  author    = {David Danks and Alex John London},
  booktitle = {International Joint Conference on Artificial Intelligence},
  pages     = {4691--4697},
  title     = {Algorithmic Bias in Autonomous Systems},
  year      = 2017
}
@inproceedings{Kleinberg-et-al-2016,
  author    = {Jon Kleinberg and Sendhil Mullainathan and Manish Raghavan},
  booktitle = {Innovations in Theoretical Computer Science Conference},
  pages     = {1--23},
  title     = {Inherent Trade-Offs in the Fair Determination of Risk Scores},
  volume    = 67,
  year      = 2017
}
@article{Chouldechova-2016,
  author  = {Alexandra Chouldechova},
  journal = {Big data},
  number  = 2,
  pages   = {153--163},
  title   = {Fair prediction with disparate impact: A study of bias in recidivism prediction instruments},
  volume  = 5,
  year    = 2017
}
@article{Berk-et-al-2017,
  author  = {Richard Berk and Hoda Heidari and Shahin Jabbari and Michael Kearns and Aaron Roth},
  journal = {Sociological Methods \& Research},
  number  = 1,
  pages   = {3--44},
  title   = {Fairness in Criminal Justice Risk Assessments: the State of the Art},
  volume  = 50,
  year    = 2017
}
@article{Wachter-et-al-2017,
  author  = {Sandra Wachter and Brent Mittelstadt and Luciano Floridi},
  journal = {International Data Privacy Law},
  number  = 2,
  pages   = {76--99},
  title   = {Why a right to explanation of automated decision-making does not exist in the general Data Protection Regulation},
  volume  = 7,
  year    = 2017
}
@article{Heyns-2017,
  author  = {Christof Heyns},
  journal = {South African Journal of Human Rights},
  number  = 1,
  pages   = {46--71},
  title   = {Autonomous weapons in armed conflict and the right to a dignified life: {A}n {A}frican perspective},
  volume  = 33,
  year    = 2017
}
@book{Manyika-et-al-2017,
  author    = {James Manyika and Susan Lund and Michael Chui and Jacques Bughin and Jonathan Woetzel and Parul Batra and Ryan Ko and Saurabh Sanghvi},
  publisher = {McKinsey Global Institute},
  title     = {Jobs Lost, Jobs Gained: Workforce Transitions in a Time of Automation},
  year      = 2017
}
@article{Frey-Osborne-2017,
  author  = {Carl Benedikt Frey and Michael A. Osborne},
  journal = {Technological forecasting and social change},
  pages   = {254--280},
  title   = {The future of employment: How susceptible are jobs to computerisation?},
  volume  = 114,
  year    = 2017
}
@misc{Ahmed-2017,
  author       = {Tufayel Ahmed},
  howpublished = {Newsweek, 8 Sept 2017. \url{https://www.newsweek.com/ai-can-tell-if-youre-gay-artificial-intelligence-predicts-sexuality-one-photo-661643}},
  title        = {{AI} Can Tell If You're Gay: Artificial Intelligence Predicts Sexuality From One Photo with Startling Accuracy},
  year         = 2017
}
@misc{Matsakis-2017,
  author       = {Louise Matsakis},
  howpublished = {Vice, Sept 8, 2017. \url{https://www.vice.com/en/article/a33xb4/a-frightening-ai-can-determine-a-persons-sexuality-with-91-accuracy}},
  title        = {A Frightening {AI} Can Determine Whether a Person Is Gay With 91 Percent Accuracy},
  year         = 2017
}
@misc{Fernandez-2017,
  author = {Colin Fernandez},
  note   = {Daily Mail, 7 Sept, 2017. \url{https://www.dailymail.co.uk/sciencetech/article-4862676/Artificial-intelligence-tell-gay.html}},
  title  = {Can a computer tell if you're GAY? {A}rtificial intelligence system guesses your sexuality with 91\% accuracy just by looking at a photo of your face},
  year   = 2017
}
@incollection{Reiss-Sprenger-2017,
  author    = {Reiss, Julian and Sprenger, Jan},
  booktitle = {The {Stanford} Encyclopedia of Philosophy},
  title     = {{Scientific Objectivity}},
  year      = 2017
}
@article{binns2018algorithmic,
  author    = {Binns, Reuben},
  journal   = {Philosophy \& Technology},
  number    = 4,
  pages     = {543--556},
  publisher = {Springer},
  title     = {Algorithmic accountability and public reason},
  volume    = 31,
  year      = 2018
}
@book{tegmark2018life,
  author    = {Tegmark, Max},
  publisher = {Vintage},
  title     = {Life 3.0: Being human in the age of artificial intelligence},
  year      = 2018
}
@book{sutton2018reinforcement,
  author    = {Sutton, Richard S and Barto, Andrew G},
  publisher = {MIT Press},
  title     = {Reinforcement learning: An introduction, 2nd Edition},
  year      = 2018
}
@book{sejnowski2018deep,
  author    = {Sejnowski, Terrence J},
  publisher = {MIT press},
  title     = {The deep learning revolution},
  year      = 2018
}
@article{elfwing2018sigmoid,
  author    = {Elfwing, Stefan and Uchibe, Eiji and Doya, Kenji},
  journal   = {Neural Networks},
  pages     = {3--11},
  publisher = {Elsevier},
  title     = {Sigmoid-weighted linear units for neural network function approximation in reinforcement learning},
  volume    = 107,
  year      = 2018
}
@inproceedings{serra2018bounding,
  author    = {Serra, Thiago and Tjandraatmadja, Christian and Ramalingam, Srikumar},
  booktitle = {International Conference on Machine Learning},
  pages     = {4558--4566},
  title     = {Bounding and counting linear regions of deep neural networks},
  year      = 2018
}
@inproceedings{dorta2018structured,
  author    = {Dorta, Garoe and Vicente, Sara and Agapito, Lourdes and Campbell, Neill DF and Simpson, Ivor},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5477--5485},
  title     = {Structured uncertainty prediction networks},
  year      = 2018
}
@inproceedings{prokudin2018deep,
  author    = {Prokudin, Sergey and Gehler, Peter and Nowozin, Sebastian},
  booktitle = {European Conference on Computer Vision},
  pages     = {534--551},
  title     = {Deep directional statistics: Pose estimation with uncertainty quantification},
  year      = 2018
}
@article{bottou2018optimization,
  author    = {Bottou, L{\'e}on and Curtis, Frank E and Nocedal, Jorge},
  journal   = {SIAM Review},
  number    = 2,
  pages     = {223--311},
  publisher = {SIAM},
  title     = {Optimization methods for large-scale machine learning},
  volume    = 60,
  year      = 2018
}
@article{jastrzębski2018factors,
  author  = {Jastrz{\k{e}}bski, Stanis{\l}aw and Kenton, Zachary and Arpit, Devansh and Ballas, Nicolas and Fischer, Asja and Bengio, Yoshua and Storkey, Amos},
  journal = {arXiv:1711.04623},
  title   = {Three factors influencing minima in {SGD}},
  year    = 2018
}
@inproceedings{smith2018dont,
  author    = {Samuel L. Smith and Pieter{-}Jan Kindermans and Chris Ying and Quoc V. Le},
  booktitle = {International Conference on Learning Representations},
  title     = {Don't Decay the Learning Rate, Increase the Batch Size},
  year      = 2018
}
@article{goyal2018accurate,
  author  = {Goyal, Priya and Doll{\'a}r, Piotr and Girshick, Ross and Noordhuis, Pieter and Wesolowski, Lukasz and Kyrola, Aapo and Tulloch, Andrew and Jia, Yangqing and He, Kaiming},
  journal = {arXiv:1706.02677},
  title   = {Accurate, Large Minibatch {SGD}: {T}raining {I}mage{N}et in 1 Hour},
  year    = 2018
}
@inproceedings{reddi2018on,
  author    = {Sashank J. Reddi and Satyen Kale and Sanjiv Kumar},
  booktitle = {International Conference on Learning Representations},
  title     = {On the Convergence of {A}dam and Beyond},
  year      = 2018
}
@article{zaheer2018adaptive,
  author  = {Zaheer, Manzil and Reddi, Sashank and Sachan, Devendra and Kale, Satyen and Kumar, Sanjiv},
  journal = {Neural Information Processing Systems},
  pages   = {9815--9825},
  title   = {Adaptive methods for nonconvex optimization},
  volume  = 31,
  year    = 2018
}
@article{baydin2018automatic,
  author    = {Baydin, Atilim Gunes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
  journal   = {Journal of Marchine Learning Research},
  pages     = {1--43},
  publisher = {Microtome Publishing},
  title     = {Automatic differentiation in machine learning: A survey},
  volume    = 18,
  year      = 2018
}
@inproceedings{xiao2018dynamical,
  author    = {Xiao, Lechao and Bahri, Yasaman and Sohl-Dickstein, Jascha and Schoenholz, Samuel and Pennington, Jeffrey},
  booktitle = {International Conference on Machine Learning},
  pages     = {5393--5402},
  title     = {Dynamical isometry and a mean field theory of {CNN}s: How to train 10,000-layer vanilla convolutional neural networks},
  year      = 2018
}
@inproceedings{falkner2018bohb,
  author    = {Falkner, Stefan and Klein, Aaron and Hutter, Frank},
  booktitle = {International Conference on Machine Learning},
  pages     = {1437--1446},
  title     = {{BOHB}: {R}obust and efficient hyperparameter optimization at scale},
  year      = 2018
}
@article{louizos2018learning,
  author  = {Christos Louizos and Max Welling and Diederik P. Kingma},
  journal = {International Conference on Learning Representations},
  title   = {Learning Sparse Neural Networks through $L_0$ Regularization},
  year    = 2018
}
@article{neyshabur2017pac,
  author  = {Neyshabur, Behnam and Bhojanapalli, Srinadh and Srebro, Nathan},
  journal = {International Conference on Learning Representations},
  title   = {A {PAC}-{B}ayesian approach to spectrally-normalized margin bounds for neural networks},
  year    = 2018
}
@article{izmailov2018averaging,
  author  = {Izmailov, Pavel and Podoprikhin, Dmitrii and Garipov, Timur and Vetrov, Dmitry and Wilson, Andrew Gordon},
  journal = {Uncertainly in Artificial Intelligence},
  pages   = {876--885},
  title   = {Averaging weights leads to wider optima and better generalization},
  year    = 2018
}
@inproceedings{garipov2018loss,
  author    = {Garipov, Timur and Izmailov, Pavel and Podoprikhin, Dmitrii and Vetrov, Dmitry and Wilson, Andrew Gordon},
  booktitle = {Neural Information Processing Systems},
  pages     = {8803-–8812},
  title     = {Loss Surfaces, Mode Connectivity, and Fast Ensembling of {DNN}s},
  volume    = 31,
  year      = 2018
}
@inproceedings{gidaris2018unsupervised,
  author    = {Spyros Gidaris and Praveer Singh and Nikos Komodakis},
  booktitle = {International Conference on Learning Representations},
  title     = {Unsupervised Representation Learning by Predicting Image Rotations},
  year      = 2018
}
@article{inoue2018data,
  author  = {Inoue, Hiroshi},
  journal = {arXiv:1801.02929},
  title   = {Data augmentation by pairing samples for images classification},
  year    = 2018
}
@inproceedings{lasseck2018acoustic,
  author    = {Lasseck, Mario},
  booktitle = {Detection and Classification of Acoustic Scenes and Events},
  pages     = {143--147},
  title     = {Acoustic bird detection with deep convolutional neural networks},
  year      = 2018
}
@article{belinkov2017synthetic,
  author  = {Belinkov, Yonatan and Bisk, Yonatan},
  journal = {International Conference on Learning Representations},
  title   = {Synthetic and natural noise both break neural machine translation},
  year    = 2018
}
@inproceedings{ebrahimi2017hotflip,
  author    = {Ebrahimi, Javid and Rao, Anyi and Lowd, Daniel and Dou, Dejing},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {31--36},
  title     = {{H}ot{F}lip: {W}hite-Box Adversarial Examples for Text Classification},
  year      = 2018
}
@article{coulombe2018text,
  author  = {Coulombe, Claude},
  journal = {arXiv:1812.04718},
  title   = {Text data augmentation made simple by leveraging {NLP} cloud {API}s},
  year    = 2018
}
@article{chen2014semantic,
  author  = {Chen, Liang-Chieh and Papandreou, George and Kokkinos, Iasonas and Murphy, Kevin and Yuille, Alan L},
  journal = {IEEE Transactions on Pattern Analysis \& Machine Intelligence},
  number  = 4,
  pages   = {834-—848},
  title   = {Deep{L}ab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected {CRF}s},
  volume  = 40,
  year    = 2018
}
@inproceedings{tran2018closer,
  author    = {Tran, Du and Wang, Heng and Torresani, Lorenzo and Ray, Jamie and LeCun, Yann and Paluri, Manohar},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {6450--6459},
  title     = {A closer look at spatiotemporal convolutions for action recognition},
  year      = 2018
}
@inproceedings{liu2018image,
  author    = {Liu, Guilin and Reda, Fitsum A and Shih, Kevin J and Wang, Ting-Chun and Tao, Andrew and Catanzaro, Bryan},
  booktitle = {European Conference on Computer Vision},
  pages     = {85--100},
  title     = {Image inpainting for irregular holes using partial convolutions},
  year      = 2018
}
@inproceedings{hu2018squeeze,
  author    = {Hu, Jie and Shen, Li and Sun, Gang},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {7132--7141},
  title     = {Squeeze-and-excitation networks},
  year      = 2018
}
@inproceedings{esteves2017polar,
  author    = {Carlos Esteves and Christine Allen{-}Blanchette and Xiaowei Zhou and Kostas Daniilidis},
  booktitle = {International Conference on Learning Representations},
  title     = {Polar Transformer Networks},
  year      = 2018
}
@article{qin2018convolutional,
  author  = {Qin, Zhuwei and Yu, Fuxun and Liu, Chenchen and Chen, Xiang},
  journal = {arXiv:1804.11191},
  title   = {How convolutional neural network see the world --- {A} survey of convolutional neural network visualization methods},
  year    = 2018
}
@article{li2018visualizing,
  author  = {Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},
  journal = {Neural Information Processing Systems},
  pages   = {6391--6401},
  title   = {Visualizing the loss landscape of neural nets},
  volume  = 31,
  year    = 2018
}
@inproceedings{wu2018blockdrop,
  author    = {Wu, Zuxuan and Nagarajan, Tushar and Kumar, Abhishek and Rennie, Steven and Davis, Larry S and Grauman, Kristen and Feris, Rogerio},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {8817--8826},
  title     = {Block{D}rop: Dynamic inference paths in residual networks},
  year      = 2018
}
@inproceedings{wu2018group,
  author    = {Wu, Yuxin and He, Kaiming},
  booktitle = {European Conference on Computer Vision},
  pages     = {3--19},
  title     = {Group normalization},
  year      = 2018
}
@inproceedings{teye2018bayesian,
  author    = {Teye, Mattias and Azizpour, Hossein and Smith, Kevin},
  booktitle = {International Conference on Machine Learning},
  pages     = {4907--4916},
  title     = {Bayesian uncertainty estimation for batch normalized deep networks},
  year      = 2018
}
@article{santurkar2018does,
  author  = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and Madry, Aleksander},
  journal = {Neural Information Processing Systems},
  pages   = {2488--2498},
  title   = {How does batch normalization help optimization?},
  volume  = 31,
  year    = 2018
}
@article{bjorck2018understanding,
  author  = {Bjorck, Nils and Gomes, Carla P and Selman, Bart and Weinberger, Kilian Q},
  journal = {Neural Information Processing Systems},
  pages   = {7705--7716},
  title   = {Understanding batch normalization},
  volume  = 31,
  year    = 2018
}
@article{arora2018theoretical,
  author  = {Arora, Sanjeev and Li, Zhiyuan and Lyu, Kaifeng},
  journal = {arXiv:1812.03981},
  title   = {Theoretical analysis of auto rate-tuning by batch normalization},
  year    = 2018
}
@article{luo2018towards,
  author  = {Luo, Ping and Wang, Xinjiang and Shao, Wenqi and Peng, Zhanglin},
  journal = {International Conference on Learning Representations},
  title   = {Towards understanding regularization in batch normalization},
  year    = 2018
}
@incollection{zhou2018unet++,
  author    = {Zhou, Zongwei and Rahman Siddiquee, Md Mahfuzur and Tajbakhsh, Nima and Liang, Jianming},
  booktitle = {Deep Learning in Medical Image Analysis Workshop},
  pages     = {3--11},
  title     = {U{N}et++: A nested {U}-{N}et architecture for medical image segmentation},
  year      = 2018
}
@article{iglovikov2018ternausnet,
  author  = {Iglovikov, Vladimir and Shvets, Alexey},
  journal = {arXiv:1801.05746},
  title   = {Ternaus{N}et: {U}-{N}et with {VGG}11 encoder pre-trained on {I}mage{N}et for image segmentation},
  year    = 2018
}
@article{yao2018pixel,
  author    = {Yao, Wei and Zeng, Zhigang and Lian, Cheng and Tang, Huiming},
  journal   = {Neurocomputing},
  pages     = {364--371},
  publisher = {Elsevier},
  title     = {Pixel-wise regression using {U}-{N}et and its application on pansharpening},
  volume    = 312,
  year      = 2018
}
@article{kudo2018subword,
  author  = {Kudo, Taku},
  journal = {Meeting of the Association for Computational Linguistics},
  pages   = {66--75},
  title   = {Subword regularization: Improving neural network translation models with multiple subword candidates},
  year    = 2018
}
@article{kudo2018sentencepiece,
  author  = {Kudo, Taku and Richardson, John},
  journal = {Empirical Methods in Natural Language Processing},
  pages   = {66--71},
  title   = {Sentence{P}iece: A simple and language independent subword tokenizer and detokenizer for neural text processing},
  year    = 2018
}
@article{kulikov2018importance,
  author  = {Kulikov, Ilia and Miller, Alexander H and Cho, Kyunghyun and Weston, Jason},
  journal = {ACL International Conference on Natural Language Generation},
  pages   = {76--87},
  title   = {Importance of search and evaluation strategies in neural dialogue modeling},
  year    = 2018
}
@inproceedings{fan2018hierarchical,
  author    = {Angela Fan and Mike Lewis and Yann N. Dauphin},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {889--898},
  title     = {Hierarchical Neural Story Generation},
  year      = 2018
}
@article{shaw2018self,
  author  = {Shaw, Peter and Uszkoreit, Jakob and Vaswani, Ashish},
  journal = {ACL Human Language Technologies},
  pages   = {464--468},
  title   = {Self-attention with relative position representations},
  year    = 2018
}
@article{liu2018generating,
  author  = {Liu, Peter J and Saleh, Mohammad and Pot, Etienne and Goodrich, Ben and Sepassi, Ryan and Kaiser, Lukasz and Shazeer, Noam},
  journal = {International Conference on Learning Representations},
  title   = {Generating {W}ikipedia by summarizing long sequences},
  year    = 2018
}
@inproceedings{hu2018relation,
  author    = {Hu, Han and Gu, Jiayuan and Zhang, Zheng and Dai, Jifeng and Wei, Yichen},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {3588--3597},
  title     = {Relation networks for object detection},
  year      = 2018
}
@inproceedings{wang2018non,
  author    = {Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {7794--7803},
  title     = {Non-local neural networks},
  year      = 2018
}
@inproceedings{parmar2018image,
  author    = {Parmar, Niki and Vaswani, Ashish and Uszkoreit, Jakob and Kaiser, Lukasz and Shazeer, Noam and Ku, Alexander and Tran, Dustin},
  booktitle = {International Conference on Machine Learning},
  pages     = {4055--4064},
  title     = {Image transformer},
  year      = 2018
}
@inproceedings{zhang2018end,
  author    = {Zhang, Muhan and Cui, Zhicheng and Neumann, Marion and Chen, Yixin},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {4438--4445},
  title     = {An end-to-end deep learning architecture for graph classification},
  year      = 2018
}
@article{zhang2018link,
  author  = {Zhang, Muhan and Chen, Yixin},
  journal = {Neural Information Processing Systems},
  pages   = {5171--5181},
  title   = {Link prediction based on graph neural networks},
  volume  = 31,
  year    = 2018
}
@article{lee2018higher,
  author  = {Lee, John Boaz and Rossi, Ryan A and Kong, Xiangnan and Kim, Sungchul and Koh, Eunyee and Rao, Anup},
  journal = {arXiv:1809.07697},
  title   = {Higher-order graph convolutional networks},
  year    = 2018
}
@inproceedings{xu2018representation,
  author    = {Xu, Keyulu and Li, Chengtao and Tian, Yonglong and Sonobe, Tomohiro and Kawarabayashi, Ken-ichi and Jegelka, Stefanie},
  booktitle = {International Conference on Machine Learning},
  pages     = {5453--5462},
  title     = {Representation learning on graphs with jumping knowledge networks},
  year      = 2018
}
@article{zhang2018gaan,
  author  = {Zhang, Jiani and Shi, Xingjian and Xie, Junyuan and Ma, Hao and King, Irwin and Yeung, Dit-Yan},
  journal = {Uncertainty in Artificial Intelligence},
  pages   = {339--349},
  title   = {Ga{AN}: Gated attention networks for learning on large and spatiotemporal graphs},
  year    = 2018
}
@article{murphy2018janossy,
  author  = {Murphy, Ryan L and Srinivasan, Balasubramaniam and Rao, Vinayak and Ribeiro, Bruno},
  journal = {International Conference on Learning Representations},
  title   = {Janossy pooling: Learning deep permutation-invariant functions for variable-size inputs},
  year    = 2018
}
@article{monti2018dual,
  author  = {Monti, Federico and Shchur, Oleksandr and Bojchevski, Aleksandar and Litany, Or and G{\"u}nnemann, Stephan and Bronstein, Michael M},
  journal = {arXiv:1806.00770},
  title   = {Dual-primal graph convolutional networks},
  year    = 2018
}
@article{chen2017stochastic,
  author  = {Chen, Jianfei and Zhu, Jun and Song, Le},
  journal = {International Conference on Machine Learning},
  pages   = {941--949},
  title   = {Stochastic training of graph convolutional networks with variance reduction},
  year    = 2018
}
@inproceedings{ying2018graph,
  author    = {Ying, Rex and He, Ruining and Chen, Kaifeng and Eksombatchai, Pong and Hamilton, William L and Leskovec, Jure},
  booktitle = {ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
  pages     = {974--983},
  title     = {Graph convolutional neural networks for web-scale recommender systems},
  year      = 2018
}
@article{chen2018fastgcn,
  author  = {Chen, Jie and Ma, Tengfei and Xiao, Cao},
  journal = {International Conference on Learning Representations},
  title   = {Fast{GCN}: Fast learning with graph convolutional networks via importance sampling},
  year    = 2018
}
@article{huang2018adaptive,
  author  = {Huang, Wenbing and Zhang, Tong and Rong, Yu and Huang, Junzhou},
  journal = {Neural Information Processing Systems},
  pages   = {4563--4572},
  title   = {Adaptive sampling towards fast graph representation learning},
  volume  = 31,
  year    = 2018
}
@inproceedings{schlichtkrull2018modeling,
  author    = {Schlichtkrull, Michael and Kipf, Thomas N and Bloem, Peter and Berg, Rianne van den and Titov, Ivan and Welling, Max},
  booktitle = {European Semantic Web Conference},
  pages     = {593--607},
  title     = {Modeling relational data with graph convolutional networks},
  year      = 2018
}
@article{ying2018hierarchical,
  author  = {Ying, Zhitao and You, Jiaxuan and Morris, Christopher and Ren, Xiang and Hamilton, Will and Leskovec, Jure},
  journal = {Neural Information Processing Systems},
  pages   = {4805--4815},
  title   = {Hierarchical graph representation learning with differentiable pooling},
  volume  = 31,
  year    = 2018
}
@inproceedings{li2018deeper,
  author    = {Li, Qimai and Han, Zhichao and Wu, Xiao-Ming},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {3438--3545},
  title     = {Deeper insights into graph convolutional networks for semi-supervised learning},
  year      = 2018
}
@article{barratt2018note,
  author  = {Barratt, Shane and Sharma, Rishi},
  journal = {Workshop on Theoretical Foundations and Applications of Deep Generative Models},
  title   = {A note on the inception score},
  year    = 2018
}
@article{karras2017progressive,
  author  = {Karras, Tero and Aila, Timo and Laine, Samuli and Lehtinen, Jaakko},
  journal = {International Conference on Learning Representations},
  title   = {Progressive growing of {GAN}s for improved quality, stability, and variation},
  year    = 2018
}
@article{creswell2018generative,
  author    = {Creswell, Antonia and White, Tom and Dumoulin, Vincent and Arulkumaran, Kai and Sengupta, Biswa and Bharath, Anil A},
  journal   = {IEEE Signal Processing Magazine},
  number    = 1,
  pages     = {53--65},
  publisher = {IEEE},
  title     = {Generative adversarial networks: An overview},
  volume    = 35,
  year      = 2018
}
@article{de2018molgan,
  author  = {De Cao, Nicola and Kipf, Thomas},
  journal = {ICML Workshop on Theoretical Foundations and Applications of Deep Generative Models},
  title   = {Mol{GAN}: An implicit generative model for small molecular graphs},
  year    = 2018
}
@article{donahue2018adversarial,
  author  = {Donahue, Chris and McAuley, Julian and Puckette, Miller},
  journal = {International Conference on Learning Representations},
  title   = {Adversarial audio synthesis},
  year    = 2018
}
@inproceedings{fang2018high,
  author    = {Fang, Fuming and Yamagishi, Junichi and Echizen, Isao and Lorenzo-Trueba, Jaime},
  booktitle = {International Conference on Acoustics, Speech and Signal Processing},
  pages     = {5279--5283},
  title     = {High-quality nonparallel voice conversion based on cycle-consistent adversarial network},
  year      = 2018
}
@article{hartmann2018eeg,
  author  = {Hartmann, Kay Gregor and Schirrmeister, Robin Tibor and Ball, Tonio},
  journal = {arXiv:1806.01875},
  title   = {{EEG-GAN}: Generative adversarial networks for electroencephalograhic ({EEG}) brain signals},
  year    = 2018
}
@article{fedus2018maskgan,
  author  = {Fedus, William and Goodfellow, Ian and Dai, Andrew M},
  journal = {International Conference on Learning Representations},
  title   = {Mask{GAN}: {B}etter text generation via filling in the\_},
  year    = 2018
}
@inproceedings{wang2018video,
  author    = {Ting{-}Chun Wang and Ming{-}Yu Liu and Jun{-}Yan Zhu and Nikolai Yakovenko and Andrew Tao and Jan Kautz and Bryan Catanzaro},
  booktitle = {Neural Information Processing Systems},
  pages     = {1152--1164},
  title     = {Video-to-Video Synthesis},
  volume    = 31,
  year      = 2018
}
@article{miyato2018spectral,
  author  = {Miyato, Takeru and Kataoka, Toshiki and Koyama, Masanori and Yoshida, Yuichi},
  journal = {International Conference on Learning Representations},
  title   = {Spectral normalization for generative adversarial networks},
  year    = 2018
}
@inproceedings{wu2018wasserstein,
  author    = {Wu, Jiqing and Huang, Zhiwu and Thoma, Janine and Acharya, Dinesh and Van Gool, Luc},
  booktitle = {European Conference on Computer Vision},
  pages     = {653--668},
  title     = {Wasserstein divergence for {GAN}s},
  year      = 2018
}
@article{adler2018banach,
  author  = {Adler, Jonas and Lunz, Sebastian},
  journal = {Neural Information Processing Systems},
  pages   = {6755--6764},
  title   = {Banach {W}asserstein {GAN}},
  volume  = 31,
  year    = 2018
}
@article{lucic2018gans,
  author  = {Lucic, Mario and Kurach, Karol and Michalski, Marcin and Gelly, Sylvain and Bousquet, Olivier},
  journal = {Neural Information Processing Systems},
  pages   = {698--707},
  title   = {Are {GAN}s created equal? {A} large-scale study},
  volume  = 31,
  year    = 2018
}
@article{pieters2018comparing,
  author  = {Pieters, Mathijs and Wiering, Marco},
  journal = {arXiv:1803.09093},
  title   = {Comparing generative adversarial network techniques for image creation and modification},
  year    = 2018
}
@article{lin2018pacgan,
  author  = {Lin, Zinan and Khetan, Ashish and Fanti, Giulia and Oh, Sewoong},
  journal = {Neural Information Processing Systems},
  pages   = {1505--1514},
  title   = {Pac{GAN}: The power of two samples in generative adversarial networks},
  volume  = 31,
  year    = 2018
}
@inproceedings{ghosh2018multi,
  author    = {Ghosh, Arnab and Kulharia, Viveka and Namboodiri, Vinay P and Torr, Philip HS and Dokania, Puneet K},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {8513--8521},
  title     = {Multi-agent diverse generative adversarial networks},
  year      = 2018
}
@inproceedings{wang2018high,
  author    = {Wang, Ting-Chun and Liu, Ming-Yu and Zhu, Jun-Yan and Tao, Andrew and Kautz, Jan and Catanzaro, Bryan},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {8798--8807},
  title     = {High-resolution image synthesis and semantic manipulation with conditional {GAN}s},
  year      = 2018
}
@article{miyato2018cgans,
  author  = {Miyato, Takeru and Koyama, Masanori},
  journal = {International Conference on Learning Representations},
  title   = {c{GAN}s with projection discriminator},
  year    = 2018
}
@article{donahue2018semantically,
  author  = {Chris Donahue and Zachary C. Lipton and Akshay Balsubramani and Julian McAuley},
  journal = {International Conference on Learning Representations},
  title   = {Semantically Decomposing the Latent Spaces of Generative Adversarial Networks},
  year    = 2018
}
@article{xiao2018dnagan,
  author  = {Taihong Xiao and Jiapeng Hong and Jinwen Ma},
  journal = {International Conference on Learning Representations},
  title   = {{DNA-GAN}: Learning Disentangled Representations from Multi-Attribute Images},
  year    = 2018
}
@inproceedings{choi2018stargan,
  author    = {Choi, Yunjey and Choi, Minje and Kim, Munyoung and Ha, Jung-Woo and Kim, Sunghun and Choo, Jaegul},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {8789--8797},
  title     = {Star{GAN}: Unified generative adversarial networks for multi-domain image-to-image translation},
  year      = 2018
}
@article{creswell2018inverting,
  author    = {Creswell, Antonia and Bharath, Anil Anthony},
  journal   = {IEEE Transactions on Neural Networks and Learning Systems},
  number    = 7,
  pages     = {1967--1974},
  publisher = {IEEE},
  title     = {Inverting the generator of a generative adversarial network},
  volume    = 30,
  year      = 2018
}
@article{kingma2018glow,
  author  = {Kingma, Durk P and Dhariwal, Prafulla},
  journal = {Neural Information Processing Systems},
  pages   = {10236--10245},
  title   = {Glow: Generative flow with invertible 1x1 convolutions},
  volume  = 31,
  year    = 2018
}
@article{jacobsen2018revnet,
  author  = {Jacobsen, J{\"o}rn-Henrik and Smeulders, Arnold and Oyallon, Edouard},
  journal = {International Conference on Learning Representations},
  title   = {i-{R}ev{N}et: Deep invertible networks},
  year    = 2018
}
@article{mackay2018reversible,
  author  = {MacKay, Matthew and Vicol, Paul and Ba, Jimmy and Grosse, Roger B},
  journal = {Neural Information Processing Systems},
  pages   = {9043--9054},
  title   = {Reversible recurrent neural networks},
  volume  = 31,
  year    = 2018
}
@article{kim2018flowavenet,
  author  = {Kim, Sungwon and Lee, Sang-gil and Song, Jongyoon and Kim, Jaehyeon and Yoon, Sungroh},
  journal = {International Conference on Machine Learning},
  pages   = {3370--3378},
  title   = {Flo{W}ave{N}et: A generative flow for raw audio},
  year    = 2018
}
@article{he2018unsupervised,
  author  = {He, Junxian and Neubig, Graham and Berg-Kirkpatrick, Taylor},
  journal = {ACL Empirical Methods in Natural Language Processing},
  pages   = {1292--1302},
  title   = {Unsupervised learning of syntactic structure with invertible neural projections},
  year    = 2018
}
@inproceedings{haarnoja2018latent,
  author    = {Haarnoja, Tuomas and Hartikainen, Kristian and Abbeel, Pieter and Levine, Sergey},
  booktitle = {International Conference on Machine Learning},
  pages     = {1851--1860},
  title     = {Latent space policies for hierarchical reinforcement learning},
  year      = 2018
}
@inproceedings{huang2018neural,
  author    = {Huang, Chin-Wei and Krueger, David and Lacoste, Alexandre and Courville, Aaron},
  booktitle = {International Conference on Machine Learning},
  pages     = {2078--2087},
  title     = {Neural autoregressive flows},
  year      = 2018
}
@inproceedings{oord2018parallel,
  author    = {Van den Oord, Aaron and Li, Yazhe and Babuschkin, Igor and Simonyan, Karen and Vinyals, Oriol and Kavukcuoglu, Koray and Driessche, George and Lockhart, Edward and Cobo, Luis and Stimberg, Florian and others},
  booktitle = {International Conference on Machine Learning},
  pages     = {3918--3926},
  title     = {Parallel {W}ave{N}et: Fast high-fidelity speech synthesis},
  year      = 2018
}
@inproceedings{chang2018reversible,
  author    = {Chang, Bo and Meng, Lili and Haber, Eldad and Ruthotto, Lars and Begert, David and Holtham, Elliot},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {2811--2818},
  title     = {Reversible architectures for arbitrarily deep residual neural networks},
  year      = 2018
}
@article{chen2018neural,
  author  = {Chen, Ricky TQ and Rubanova, Yulia and Bettencourt, Jesse and Duvenaud, David K},
  journal = {Neural Information Processing Systems},
  pages   = {6572--6583},
  title   = {Neural ordinary differential equations},
  volume  = 31,
  year    = 2018
}
@article{chen2018isolating,
  author  = {Chen, Ricky TQ and Li, Xuechen and Grosse, Roger B and Duvenaud, David K},
  journal = {Neural Information Processing Systems},
  pages   = {2615--2625},
  title   = {Isolating sources of disentanglement in variational autoencoders},
  volume  = 31,
  year    = 2018
}
@article{akuzawa2018expressive,
  author  = {Akuzawa, Kei and Iwasawa, Yusuke and Matsuo, Yutaka},
  journal = {INTERPSPEECH},
  pages   = {3067--3071},
  title   = {Expressive speech synthesis via modeling expressions with variational autoencoder},
  year    = 2018
}
@article{gomez2018automatic,
  author    = {G{\'o}mez-Bombarelli, Rafael and Wei, Jennifer N and Duvenaud, David and Hern{\'a}ndez-Lobato, Jos{\'e} Miguel and S{\'a}nchez-Lengeling, Benjam{\'\i}n and Sheberla, Dennis and Aguilera-Iparraguirre, Jorge and Hirzel, Timothy D and Adams, Ryan P and Aspuru-Guzik, Al{\'a}n},
  journal   = {ACS Central Science},
  number    = 2,
  pages     = {268--276},
  publisher = {ACS Publications},
  title     = {Automatic chemical design using a data-driven continuous representation of molecules},
  volume    = 4,
  year      = 2018
}
@article{sultan2018transferable,
  author    = {Sultan, Mohammad M and Wayment-Steele, Hannah K and Pande, Vijay S},
  journal   = {Journal of Chemical Theory and Computation},
  number    = 4,
  pages     = {1887--1894},
  publisher = {ACS Publications},
  title     = {Transferable neural networks for enhanced sampling of protein dynamics},
  volume    = 14,
  year      = 2018
}
@inproceedings{simonovsky2018graphvae,
  author    = {Simonovsky, Martin and Komodakis, Nikos},
  booktitle = {International Conference on Artificial Neural Networks},
  pages     = {412--422},
  title     = {Graph{VAE}: Towards generation of small graphs using variational autoencoders},
  year      = 2018
}
@article{hernandez2018variational,
  author    = {Hern{\'a}ndez, Carlos X and Wayment-Steele, Hannah K and Sultan, Mohammad M and Husic, Brooke E and Pande, Vijay S},
  journal   = {Physical Review E},
  number    = 6,
  pages     = {062412},
  publisher = {APS},
  title     = {Variational encoding of complex dynamics},
  volume    = 97,
  year      = 2018
}
@inproceedings{inoue2018transfer,
  author    = {Inoue, Tadanobu and Choudhury, Subhajit and De Magistris, Giovanni and Dasgupta, Sakyasingha},
  booktitle = {IEEE International Conference on Image Processing},
  pages     = {2725--2729},
  title     = {Transfer learning from synthetic to real images using variational autoencoders for precise position detection},
  year      = 2018
}
@article{park2018multimodal,
  author    = {Park, Daehyung and Hoshi, Yuuna and Kemp, Charles C},
  journal   = {IEEE Robotics and Automation Letters},
  number    = 3,
  pages     = {1544--1551},
  publisher = {IEEE},
  title     = {A multimodal anomaly detector for robot-assisted feeding using an {LSTM}-based variational autoencoder},
  volume    = 3,
  year      = 2018
}
@article{eslami2018neural,
  author    = {Eslami, SM Ali and Jimenez Rezende, Danilo and Besse, Frederic and Viola, Fabio and Morcos, Ari S and Garnelo, Marta and Ruderman, Avraham and Rusu, Andrei A and Danihelka, Ivo and Gregor, Karol and others},
  journal   = {Science},
  number    = 6394,
  pages     = {1204--1210},
  publisher = {American Association for the Advancement of Science},
  title     = {Neural scene representation and rendering},
  volume    = 360,
  year      = 2018
}
@inproceedings{liang2018variational,
  author    = {Liang, Dawen and Krishnan, Rahul G and Hoffman, Matthew D and Jebara, Tony},
  booktitle = {World Wide Web Conference},
  pages     = {689--698},
  title     = {Variational autoencoders for collaborative filtering},
  year      = 2018
}
@inproceedings{zong2018deep,
  author    = {Zong, Bo and Song, Qi and Min, Martin Renqiang and Cheng, Wei and Lumezanu, Cristian and Cho, Daeki and Chen, Haifeng},
  booktitle = {International Conference on Learning Representations},
  title     = {Deep autoencoding {G}aussian mixture model for unsupervised anomaly detection},
  year      = 2018
}
@article{vahdat2018dvae,
  author  = {Vahdat, Arash and Andriyash, Evgeny and Macready, William},
  journal = {Neural Information Processing Systems},
  pages   = {1869--1878},
  title   = {{DVAE}\#: Discrete variational autoencoders with relaxed {B}oltzmann priors},
  volume  = 31,
  year    = 2018
}
@inproceedings{vahdat2018dvae++,
  author    = {Vahdat, Arash and Macready, William and Bian, Zhengbing and Khoshaman, Amir and Andriyash, Evgeny},
  booktitle = {International Conference on Machine Learning},
  pages     = {5035--5044},
  title     = {D{VAE}++: Discrete variational autoencoders with overlapping transformations},
  year      = 2018
}
@article{tolstikhin2017wasserstein,
  author  = {Tolstikhin, Ilya and Bousquet, Olivier and Gelly, Sylvain and Schoelkopf, Bernhard},
  journal = {International Conference on Learning Representations},
  title   = {Wasserstein auto-encoders},
  year    = 2018
}
@inproceedings{kim2018disentangling,
  author    = {Kim, Hyunjik and Mnih, Andriy},
  booktitle = {International Conference on Machine Learning},
  pages     = {2649--2658},
  title     = {Disentangling by factorising},
  year      = 2018
}
@article{kumar2017variational,
  author  = {Kumar, Abhishek and Sattigeri, Prasanna and Balakrishnan, Avinash},
  journal = {International Conference on Learning Representations},
  title   = {Variational inference of disentangled latent concepts from unlabeled observations},
  year    = 2018
}
@article{franccois2018introduction,
  author    = {Fran{\c{c}}ois-Lavet, Vincent and Henderson, Peter and Islam, Riashat and Bellemare, Marc G and Pineau, Joelle and others},
  journal   = {Foundations and Trends in Machine Learning},
  number    = {3-4},
  pages     = {219--354},
  publisher = {Now Publishers, Inc.},
  title     = {An introduction to deep reinforcement learning},
  volume    = 11,
  year      = 2018
}
@inproceedings{hessel2018rainbow,
  author    = {Hessel, Matteo and Modayil, Joseph and van Hasselt, Hado and Schaul, Tom and Ostrovski, Georg and Dabney, Will and Horgan, Dan and Piot, Bilal and Azar, Mohammad and Silver, David},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {3215--3222},
  title     = {Rainbow: Combining improvements in deep reinforcement learning},
  year      = 2018
}
@article{fortunato2017noisy,
  author  = {Fortunato, Meire and Azar, Mohammad Gheshlaghi and Piot, Bilal and Menick, Jacob and Osband, Ian and Graves, Alex and Mnih, Vlad and Munos, Remi and Hassabis, Demis and Pietquin, Olivier and others},
  journal = {International Conference on Learning Representations},
  title   = {Noisy networks for exploration},
  year    = 2018
}
@inproceedings{dabney2018distributional,
  author    = {Dabney, Will and Rowland, Mark and Bellemare, Marc and Munos, R{\'e}mi},
  booktitle = {AAAI Conference on Artificial Intelligence},
  title     = {Distributional reinforcement learning with quantile regression},
  year      = 2018
}
@inproceedings{fujimoto2018addressing,
  author    = {Fujimoto, Scott and Hoof, Herke and Meger, David},
  booktitle = {International Conference on Machine Learning},
  pages     = {1587--1596},
  title     = {Addressing function approximation error in actor-critic methods},
  year      = 2018
}
@inproceedings{haarnoja2018soft,
  author    = {Haarnoja, Tuomas and Zhou, Aurick and Abbeel, Pieter and Levine, Sergey},
  booktitle = {International Conference on Machine Learning},
  pages     = {1861--1870},
  title     = {Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor},
  year      = 2018
}
@article{christiano2018supervising,
  author  = {Christiano, Paul and Shlegeris, Buck and Amodei, Dario},
  journal = {arXiv:1810.08575},
  title   = {Supervising strong learners by amplifying weak experts},
  year    = 2018
}
@inproceedings{nguyen2018optimization,
  author    = {Nguyen, Quynh and Hein, Matthias},
  booktitle = {International Conference on Machine Learning},
  pages     = {3730--3739},
  title     = {Optimization landscape and expressivity of deep {CNN}s},
  year      = 2018
}
@article{li2018measuring,
  author  = {Chunyuan Li and Heerad Farkhoor and Rosanne Liu and Jason Yosinski},
  journal = {International Conference on Learning Representations},
  title   = {Measuring the Intrinsic Dimension of Objective Landscapes},
  year    = 2018
}
@article{li2018learning,
  author  = {Li, Yuanzhi and Liang, Yingyu},
  journal = {Neural Information Processing Systems},
  pages   = {8168--8177},
  title   = {Learning overparameterized neural networks via stochastic gradient descent on structured data},
  volume  = 31,
  year    = 2018
}
@inproceedings{draxler2019essentially,
  author    = {Felix Draxler and Kambis Veschgini and Manfred Salmhofer and Fred A. Hamprecht},
  booktitle = {International Conference on Machine Learning},
  pages     = {1308--1317},
  title     = {Essentially No Barriers in Neural Network Energy Landscape},
  year      = 2018
}
@inproceedings{kleinberg2018alternative,
  author    = {Robert Kleinberg and Yuanzhi Li and Yang Yuan},
  booktitle = {International Conference on Machine Learning},
  pages     = {2703--2712},
  title     = {An Alternative View: When Does {SGD} Escape Local Minima?},
  year      = 2018
}
@inproceedings{huang2018data,
  author    = {Huang, Zehao and Wang, Naiyan},
  booktitle = {European Conference on Computer Vision},
  pages     = {304--320},
  title     = {Data-driven sparse structure selection for deep neural networks},
  year      = 2018
}
@article{nye2018efficient,
  author  = {Nye, Maxwell and Saxe, Andrew},
  journal = {International Conference on Learning Representations (Workshop)},
  title   = {Are efficient deep representations learnable?},
  year    = 2018
}
@inproceedings{ulyanov2018deep,
  author    = {Ulyanov, Dmitry and Vedaldi, Andrea and Lempitsky, Victor},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {9446--9454},
  title     = {Deep image prior},
  year      = 2018
}
@article{taddeo2018ai,
  author    = {Taddeo, Mariarosaria and Floridi, Luciano},
  journal   = {Science},
  number    = 6404,
  pages     = {751--752},
  publisher = {American Association for the Advancement of Science},
  title     = {How {AI} can be a force for good},
  volume    = 361,
  year      = 2018
}
@inproceedings{McNamara-2018,
  author    = {Andrew McNamara and Justin Smith and Emerson Murphy-Hill},
  booktitle = {ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
  pages     = {729--733},
  title     = {Does {ACM}'s code of ethics change ethical decision making in software development?},
  year      = 2018
}
@article{Mayson-2018,
  author  = {Sandra G. Mayson},
  journal = {Yale Law Journal},
  pages   = {2122--2473},
  title   = {Bias In Bias Out},
  volume  = 128,
  year    = 2018
}
@article{Buolamwini-Gebru-2018,
  author  = {Joy Buolamwini and Timnit Gebru},
  journal = {Proceedings of Machine Learning Research},
  title   = {Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification},
  volume  = 81,
  year    = 2018
}
@article{Awad-et-al-2018,
  author  = {Awad, E. and Dsouza, S. and Kim, R. and Schulz, J. and Henrich, J. and Shariff, A. and Bonnefon, J.-F. and Rahwan, I.},
  journal = {Nature},
  pages   = {59--64},
  title   = {The moral machine experiment},
  volume  = 563,
  year    = 2018
}
@inproceedings{Noothigattu-et-al-2018,
  author    = {Ritesh Noothigattu and Snehalkumar (Neil) Gaikwad and Edmond Awad and Sohan Dsouza and Iyad Rahwan and Pradeep Ravikumar and Ariel D. Procaccia},
  booktitle = {AAAI Portuguese Conference on Artificial Intelligence},
  pages     = {1587--1594},
  title     = {A voting-based system for ethical decision making},
  year      = 2018
}
@article{Akers-et-al-2018,
  author  = {John Akers and Gagan Bansal and Gabriel Cadamuro and Christine Chen and Quanze Chen and Lucy Lin and Phoebe Mulcaire and Rajalakshmi Nandakumar and Matthew Rockett and Lucy Simko and John Toman and Tongshuang Wu and Eric Zeng and Bill Zorn and Franziska Roesner},
  journal = {arXiv:1812.09383},
  title   = {Technology-Enabled Disinformation: Summary, Lessons, and Recommendations},
  year    = 2018
}
@book{Bughin-et-al-2018,
  author    = {Jacques Bughin and Jeongmin Seong and James Manyika and Michael Chui and Raoul Joshi},
  publisher = {McKinsey Global Institute, Sept 4, 2018.},
  title     = {Notes from the {AI} Frontier: Modelling the Impact of {AI} on the World Economy},
  year      = 2018
}
@book{Manyika-Sneader-2018,
  author    = {James Manyika and Kevin Sneader},
  publisher = {McKinsey Global Institute},
  title     = {{AI}, automation, and the future of work: Ten things to solve for},
  year      = 2018
}
@article{Calo-2018,
  author  = {Ryan Calo},
  journal = {University of Bologna Law Review},
  number  = 2,
  pages   = {180--218},
  title   = {Artificial Intelligence Policy: A Primer and Roadmap},
  volume  = 3,
  year    = 2018
}
@misc{Fei-Fei-Li-2018,
  author       = {Fei-Fei Li},
  howpublished = {The New York Times, March 7, 2018. \url{https://www.nytimes.com/2018/03/07/opinion/artificial-intelligence-human.html}},
  title        = {How to Make {A.I.} That’s Good for People},
  year         = 2018
}
@misc{Knight-2018,
  author       = {Will Knight},
  howpublished = {MIT Technology Review, Nov 20, 2018. \url{https://www.technologyreview.com/2018/11/17/66372/one-of-the-fathers-of-ai-is-worried-about-its-future/}},
  title        = {One of the fathers of {AI} is worried about its future},
  year         = 2018
}
@article{Wang-Kosinski-2018,
  author  = {Y. Wang and M. Kosinski},
  journal = {Journal of Personality and Social Psychology},
  number  = 2,
  pages   = {246--257},
  title   = {Deep neural networks are more accurate than humans at detecting sexual orientation from facial images},
  volume  = 114,
  year    = 2018
}
@misc{Arcas-et-al-2018,
  author       = {Ag{\"u}era y Arcas , Blaise and Todorov, Alexander and Mitchell, Margaret},
  howpublished = {Medium, Jan 11, 2018. \url{https://medium.com/@blaisea/do-algorithms-reveal-sexual-orientation-or-just-expose-our-stereotypes-d998fafdf477}},
  title        = {Do algorithms reveal sexual orientation or just expose our stereotypes?},
  year         = 2018
}
@book{Noble-2018,
  address   = {New York},
  author    = {Safiya Noble},
  publisher = {NYU Press},
  title     = {Algorithms of Oppression},
  year      = 2018
}
@book{Eubanks-2018,
  address   = {New York},
  author    = {Virginia Eubanks},
  publisher = {St. Martin’s Press},
  title     = {Automating Inequality: How High-Tech Tools Profile, Police, and Punish the Poor},
  year      = 2018
}
@book{Broussard-2018,
  author    = {Meredith Broussard},
  publisher = {The MIT Press},
  title     = {Artificial Unintelligence: How Computers Misunderstand the World},
  year      = 2018
}
@article{holland2019emotion,
  author    = {Holland, Catherine AC and Ebner, Natalie C and Lin, Tian and Samanez-Larkin, Gregory R},
  journal   = {Cognition and {E}motion},
  number    = 2,
  pages     = {245--257},
  publisher = {Taylor \& Francis},
  title     = {Emotion identification across adulthood using the Dynamic FACES database of emotional expressions in younger, middle aged, and older adults},
  volume    = 33,
  year      = 2019
}
@book{graesser2019foundations,
  author    = {Graesser, Laura and Keng, Wah Loon},
  publisher = {Addison-Wesley Professional},
  title     = {Foundations of deep reinforcement learning},
  year      = 2019
}
@inproceedings{howard2019searching,
  author    = {Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and others},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {1314--1324},
  title     = {Searching for {M}obile{N}et{V}3},
  year      = 2019
}
@inproceedings{barron2019general,
  author    = {Barron, Jonathan T},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {4331--4339},
  title     = {A general and adaptive robust loss function},
  year      = 2019
}
@article{he2019control,
  author  = {He, Fengxiang and Liu, Tongliang and Tao, Dacheng},
  journal = {Neural Information Processing Systems},
  pages   = {1143--1152},
  title   = {Control batch size and learning rate to generalize well: Theoretical and empirical evidence},
  volume  = 32,
  year    = 2019
}
@article{loshchilov2017decoupled,
  author  = {Loshchilov, Ilya and Hutter, Frank},
  journal = {International Conference on Learning Representations},
  title   = {Decoupled weight decay regularization},
  year    = 2019
}
@article{choi2019empirical,
  author  = {Choi, Dami and Shallue, Christopher J and Nado, Zachary and Lee, Jaehoon and Maddison, Chris J and Dahl, George E},
  journal = {arXiv:1910.05446},
  title   = {On empirical comparisons of optimizers for deep learning},
  year    = 2019
}
@article{zhang2019fixup,
  author  = {Zhang, Hongyi and Dauphin, Yann N and Ma, Tengyu},
  journal = {International Conference on Learning Representations},
  title   = {Fixup initialization: Residual learning without normalization},
  year    = 2019
}
@article{huang2019gpipe,
  author  = {Huang, Yanping and Cheng, Youlong and Bapna, Ankur and Firat, Orhan and Chen, Dehao and Chen, Mia and Lee, HyoukJoong and Ngiam, Jiquan and Le, Quoc V and Wu, Yonghui and others},
  journal = {Neural Information Processing Systems},
  pages   = {103--112},
  title   = {G{P}ipe: {E}fficient Training of Giant Neural Networks using Pipeline Parallelism},
  volume  = 32,
  year    = 2019
}
@article{sohoni2019lowmemory,
  author  = {Sohoni, Nimit Sharad and Aberger, Christopher Richard and Leszczynski, Megan and Zhang, Jian and R{\'e}, Christopher},
  journal = {arXiv:1904.10631},
  title   = {Low-memory neural network training: A technical report},
  year    = 2019
}
@article{shoeybi2020megatronlm,
  author  = {Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
  journal = {arXiv:1909.08053},
  title   = {Megatron-{LM}: {T}raining multi-billion parameter language models using model parallelism},
  year    = 2019
}
@article{belkin2019reconciling,
  author    = {Belkin, Mikhail and Hsu, Daniel and Ma, Siyuan and Mandal, Soumik},
  journal   = {Proceedings of the National Academy of Sciences},
  number    = 32,
  pages     = {15849--15854},
  publisher = {National Acad Sciences},
  title     = {Reconciling modern machine-learning practice and the classical bias--variance trade-off},
  volume    = 116,
  year      = 2019
}
@article{bartlett2017nearlytight,
  author  = {Bartlett, Peter L and Harvey, Nick and Liaw, Christopher and Mehrabian, Abbas},
  journal = {Journal of Machine Learning Research},
  number  = 1,
  pages   = {2285--2301},
  title   = {Nearly-tight {VC}-dimension and pseudodimension bounds for piecewise linear neural networks},
  volume  = 20,
  year    = 2019
}
@article{fort2020deep,
  author  = {Fort, Stanislav and Hu, Huiyi and Lakshminarayanan, Balaji},
  journal = {arXiv:1912.02757},
  title   = {Deep ensembles: A loss landscape perspective},
  year    = 2019
}
@article{liu2019beta,
  author    = {Liu, Lei and Luo, Yuhao and Shen, Xu and Sun, Mingzhai and Li, Bin},
  journal   = {IEEE Access},
  pages     = {36140--36153},
  publisher = {IEEE},
  title     = {Beta-Dropout: A Unified Dropout},
  volume    = 7,
  year      = 2019
}
@article{muller2019does,
  author  = {M{\"u}ller, Rafael and Kornblith, Simon and Hinton, Geoffrey E},
  journal = {Neural Information Processing Systems},
  pages   = {4696--4705},
  title   = {When does label smoothing help?},
  volume  = 32,
  year    = 2019
}
@article{chaudhari2017entropysgd,
  abstract = {This paper proposes a new optimization algorithm called Entropy-SGD for training deep neural networks that is motivated by the local geometry of the energy landscape. Local extrema with low generalization error have a large proportion of almost-zero eigenvalues in the Hessian with very few positive or negative eigenvalues. We leverage upon this observation to construct a local-entropy-based objective function that favors well-generalizable solutions lying in large flat regions of the energy landscape, while avoiding poorly-generalizable solutions located in the sharp valleys. Conceptually, our algorithm resembles two nested loops of SGD where we use Langevin dynamics in the inner loop to compute the gradient of the local entropy before each update of the weights. We show that the new objective has a smoother energy landscape and show improved generalization over SGD using uniform stability, under certain assumptions. Our experiments on convolutional and recurrent networks demonstrate that Entropy-SGD compares favorably to state-of-the-art techniques in terms of generalization error and training time.},
  author   = {Pratik Chaudhari and Anna Choromanska and Stefano Soatto and Yann LeCun and Carlo Baldassi and Christian Borgs and Jennifer Chayes and Levent Sagun and Riccardo Zecchina},
  doi      = {10.1088/1742-5468/ab39d9},
  journal  = {Journal of Statistical Mechanics: Theory and Experiment},
  pages    = 124018,
  title    = {Entropy-{SGD}: {B}iasing gradient descent into wide valleys},
  volume   = 12,
  year     = 2019
}
@inproceedings{devlin2018bert,
  author    = {Jacob Devlin and Ming{-}Wei Chang and Kenton Lee and Kristina Toutanova},
  booktitle = {ACL Human Language Technologies},
  pages     = {4171--4186},
  title     = {{BERT:} Pre-training of Deep Bidirectional Transformers for Language Understanding},
  year      = 2019
}
@article{radford2019language,
  author  = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya and others},
  journal = {OpenAI Blog},
  number  = 8,
  pages   = 9,
  title   = {Language models are unsupervised multitask learners},
  volume  = 1,
  year    = 2019
}
@inproceedings{schneider2019wav2vec,
  author    = {Steffen Schneider and Alexei Baevski and Ronan Collobert and Michael Auli},
  booktitle = {INTERSPEECH},
  pages     = {3465--3469},
  title     = {wav2vec: Unsupervised Pre-Training for Speech Recognition},
  year      = 2019
}
@inproceedings{summers2019improved,
  author    = {Summers, Cecilia and Dinneen, Michael J},
  booktitle = {Winter Conference on Applications of Computer Vision},
  pages     = {1262--1270},
  title     = {Improved mixed-example data augmentation},
  year      = 2019
}
@inproceedings{jackson2019style,
  author    = {Jackson, Philip TG and Abarghouei, Amir Atapour and Bonner, Stephen and Breckon, Toby P and Obara, Boguslaw},
  booktitle = {IEEE Computer Vision and Pattern Recognition Workshops},
  pages     = {10--11},
  title     = {Style augmentation: {D}ata augmentation via style randomization},
  year      = 2019
}
@article{shorten2019survey,
  author  = {Shorten, Connor and Khoshgoftaar, Taghi M},
  journal = {Journal of Big Data},
  number  = 1,
  pages   = {1--48},
  title   = {A survey on image data augmentation for deep learning},
  volume  = 6,
  year    = 2019
}
@inproceedings{yun2019cutmix,
  author    = {Yun, Sangdoo and Han, Dongyoon and Oh, Seong Joon and Chun, Sanghyuk and Choe, Junsuk and Yoo, Youngjoon},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {6023--6032},
  title     = {Cut{M}ix: Regularization strategy to train strong classifiers with localizable features},
  year      = 2019
}
@article{park2019specaugment,
  author  = {Park, Daniel S and Chan, William and Zhang, Yu and Chiu, Chung-Cheng and Zoph, Barret and Cubuk, Ekin D and Le, Quoc V},
  journal = {INTERSPEECH},
  title   = {Spec{A}ugment: A simple data augmentation method for automatic speech recognition},
  year    = 2019
}
@inproceedings{wei2019eda,
  author    = {Wei, Jason and Zou, Kai},
  booktitle = {ACL Empirical Methods in Natural Language Processing},
  pages     = {6382--6388},
  title     = {{EDA}: Easy Data Augmentation Techniques for Boosting Performance on Text Classification Tasks},
  year      = 2019
}
@inproceedings{yu2019free,
  author    = {Yu, Jiahui and Lin, Zhe and Yang, Jimei and Shen, Xiaohui and Lu, Xin and Huang, Thomas S},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {4471--4480},
  title     = {Free-form image inpainting with gated convolution},
  year      = 2019
}
@inproceedings{chang2019free,
  author    = {Chang, Ya-Liang and Liu, Zhe Yu and Lee, Kuan-Ying and Hsu, Winston},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {9066--9075},
  title     = {Free-form video inpainting with 3{D} gated convolution and temporal {P}atch{GAN}},
  year      = 2019
}
@inproceedings{zhang2019making,
  author    = {Zhang, Richard},
  booktitle = {International Conference on Machine Learning},
  pages     = {7324--7334},
  title     = {Making convolutional networks shift-invariant again},
  year      = 2019
}
@article{eren2019generic,
  author  = {Eren, Levent and Ince, Turker and Kiranyaz, Serkan},
  journal = {Journal of Signal Processing Systems},
  number  = 2,
  pages   = {179--189},
  title   = {A generic intelligent bearing fault diagnosis system using compact adaptive {1D CNN} classifier},
  volume  = 91,
  year    = 2019
}
@inproceedings{su2019pixel,
  author    = {Su, Hang and Jampani, Varun and Sun, Deqing and Gallo, Orazio and Learned-Miller, Erik and Kautz, Jan},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {11166--11175},
  title     = {Pixel-adaptive convolutional neural networks},
  year      = 2019
}
@article{falk2019u,
  author  = {Falk, Thorsten and Mai, Dominic and Bensch, Robert and {\c{C}}i{\c{c}}ek, {\"O}zg{\"u}n and Abdulkadir, Ahmed and Marrakchi, Yassine and B{\"o}hm, Anton and Deubner, Jan and J{\"a}ckel, Zoe and Seiwald, Katharina and others},
  journal = {Nature Methods},
  number  = 1,
  pages   = {67--70},
  title   = {{U}-{N}et: {D}eep learning for cell counting, detection, and morphometry},
  volume  = 16,
  year    = 2019
}
@article{yamada2019shakedrop,
  author    = {Yamada, Yoshihiro and Iwamura, Masakazu and Akiba, Takuya and Kise, Koichi},
  journal   = {IEEE Access},
  pages     = {186126--186136},
  publisher = {IEEE},
  title     = {Shakedrop regularization for deep residual learning},
  volume    = 7,
  year      = 2019
}
@article{yang2019mean,
  author  = {Yang, Greg and Pennington, Jeffrey and Rao, Vinay and Sohl-Dickstein, Jascha and Schoenholz, Samuel S},
  journal = {International Conference on Learning Representations},
  title   = {A mean field theory of batch normalization},
  year    = 2019
}
@article{li2019exponential,
  author  = {Li, Zhiyuan and Arora, Sanjeev},
  journal = {International Conference on Learning Representations},
  title   = {An exponential learning rate schedule for deep learning},
  year    = 2019
}
@misc{huszar2019exponentially,
  author       = {Husz{\'a}r, Ferenc},
  howpublished = {\url{https://www.inference.vc/exponentially-growing-learning-rate-implications-of-scale-invariance-induced-by-BatchNorm/}},
  title        = {Exponentially Growing Learning Rate? {I}mplications of Scale Invariance induced by Batch Normalization},
  year         = 2019
}
@inproceedings{zeng2019learning,
  author    = {Zeng, Yanhong and Fu, Jianlong and Chao, Hongyang and Guo, Baining},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1486--1494},
  title     = {Learning pyramid-context encoder network for high-quality image inpainting},
  year      = 2019
}
@article{velivckovic2017graph,
  author  = {Veli{\v{c}}kovi{\'c}, Petar and Cucurull, Guillem and Casanova, Arantxa and Romero, Adriana and Lio, Pietro and Bengio, Yoshua},
  journal = {International Conference on Learning Representations},
  title   = {Graph attention networks},
  year    = 2019
}
@article{wang2018glue,
  author  = {Wang, Alex and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R},
  journal = {International Conference on Learning Representations},
  title   = {G{LUE}: A multi-task benchmark and analysis platform for natural language understanding},
  year    = 2019
}
@article{wang2019superglue,
  author  = {Wang, Alex and Pruksachatkun, Yada and Nangia, Nikita and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel},
  journal = {Neural Information Processing Systems},
  pages   = {3261--3275},
  title   = {Super{GLUE}: A stickier benchmark for general-purpose language understanding systems},
  volume  = 32,
  year    = 2019
}
@article{mccoy2019right,
  author  = {McCoy, R Thomas and Pavlick, Ellie and Linzen, Tal},
  journal = {Meeting of the Association for Computational Linguistics},
  pages   = {2428--3448},
  title   = {Right for the wrong reasons: Diagnosing syntactic heuristics in natural language inference},
  year    = 2019
}
@article{liu2018rethinking,
  author  = {Zhuang Liu and Mingjie Sun and Tinghui Zhou and Gao Huang and Trevor Darrell},
  journal = {International Conference on Learning Representations},
  title   = {Rethinking the Value of Network Pruning},
  year    = 2019
}
@article{voita2019analyzing,
  author  = {Voita, Elena and Talbot, David and Moiseev, Fedor and Sennrich, Rico and Titov, Ivan},
  journal = {Meeting of the Association for Computational Linguistics},
  pages   = {5797--5808},
  title   = {Analyzing multi-head self-attention: Specialized heads do the heavy lifting, the rest can be pruned},
  year    = 2019
}
@article{wu2019pay,
  author  = {Wu, Felix and Fan, Angela and Baevski, Alexei and Dauphin, Yann N and Auli, Michael},
  journal = {International Conference on Learning Representations},
  title   = {Pay less attention with lightweight and dynamic convolutions},
  year    = 2019
}
@article{wei2019nezha,
  author  = {Wei, Junqiu and Ren, Xiaozhe and Li, Xiaoguang and Huang, Wenyong and Liao, Yi and Wang, Yasheng and Lin, Jiashu and Jiang, Xin and Chen, Xiao and Liu, Qun},
  journal = {arXiv:1909.00204},
  title   = {N{EZHA}: Neural contextualized representation for {C}hinese language understanding},
  year    = 2019
}
@article{child2019generating,
  author  = {Child, Rewon and Gray, Scott and Radford, Alec and Sutskever, Ilya},
  journal = {arXiv:1904.10509},
  title   = {Generating long sequences with sparse transformers},
  year    = 2019
}
@inproceedings{ye2019cross,
  author    = {Ye, Linwei and Rochan, Mrigank and Liu, Zhi and Wang, Yang},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {10502--10511},
  title     = {Cross-modal self-attention network for referring image segmentation},
  year      = 2019
}
@inproceedings{sun2019videobert,
  author    = {Sun, Chen and Myers, Austin and Vondrick, Carl and Murphy, Kevin and Schmid, Cordelia},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {7464--7473},
  title     = {Video{BERT}: A joint model for video and language representation learning},
  year      = 2019
}
@inproceedings{girdhar2019video,
  author    = {Girdhar, Rohit and Carreira, Joao and Doersch, Carl and Zisserman, Andrew},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {244--253},
  title     = {Video action transformer network},
  year      = 2019
}
@article{su2019vl,
  author  = {Su, Weijie and Zhu, Xizhou and Cao, Yue and Li, Bin and Lu, Lewei and Wei, Furu and Dai, Jifeng},
  journal = {International Conference on Learning Representations},
  title   = {V{L}-{BERT}: Pre-training of generic visual-linguistic representations},
  year    = 2019
}
@article{tan2019lxmert,
  author  = {Tan, Hao and Bansal, Mohit},
  journal = {Empirical Methods in Natural Language Processing},
  pages   = {5099--5110},
  title   = {{LXMERT}: Learning cross-modality encoder representations from transformers},
  year    = 2019
}
@article{lu2019vilbert,
  author  = {Lu, Jiasen and Batra, Dhruv and Parikh, Devi and Lee, Stefan},
  journal = {Neural Information Processing Systems},
  pages   = {13--23},
  title   = {Vil{BERT}: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks},
  volume  = 32,
  year    = 2019
}
@article{li2019visualbert,
  author  = {Li, Liunian Harold and Yatskar, Mark and Yin, Da and Hsieh, Cho-Jui and Chang, Kai-Wei},
  journal = {arXiv:1908.03557},
  title   = {Visual{BERT}: A simple and performant baseline for vision and language},
  year    = 2019
}
@inproceedings{hu2019local,
  author    = {Hu, Han and Zhang, Zheng and Xie, Zhenda and Lin, Stephen},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {3464--3473},
  title     = {Local relation networks for image recognition},
  year      = 2019
}
@article{ramachandran2019stand,
  author  = {Niki Parmar and Prajit Ramachandran and Ashish Vaswani and Irwan Bello and Anselm Levskaya and Jonathon Shlens},
  journal = {Neural Information Processing Systems},
  pages   = {68--80},
  title   = {Stand-alone self-attention in vision models},
  volume  = 32,
  year    = 2019
}
@article{zhang2019graph,
  author    = {Zhang, Si and Tong, Hanghang and Xu, Jiejun and Maciejewski, Ross},
  journal   = {Computational Social Networks},
  number    = 1,
  pages     = {1--23},
  publisher = {SpringerOpen},
  title     = {Graph convolutional networks: A comprehensive review},
  volume    = 6,
  year      = 2019
}
@article{errica2019fair,
  author  = {Errica, Federico and Podda, Marco and Bacciu, Davide and Micheli, Alessio},
  journal = {International Conference on Learning Representations},
  title   = {A fair comparison of graph neural networks for graph classification},
  year    = 2019
}
@article{selsam2018learning,
  author  = {Selsam, Daniel and Lamm, Matthew and B{\"u}nz, Benedikt and Liang, Percy and de Moura, Leonardo and Dill, David L},
  journal = {International Conference on Learning Representations},
  title   = {Learning a {SAT} solver from single-bit supervision},
  year    = 2019
}
@inproceedings{chiang2019cluster,
  author    = {Chiang, Wei-Lin and Liu, Xuanqing and Si, Si and Li, Yang and Bengio, Samy and Hsieh, Cho-Jui},
  booktitle = {ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
  pages     = {257--266},
  title     = {Cluster-{GCN}: An efficient algorithm for training deep and large graph convolutional networks},
  year      = 2019
}
@inproceedings{abu2019mixhop,
  author    = {Abu-El-Haija, Sami and Perozzi, Bryan and Kapoor, Amol and Alipourfard, Nazanin and Lerman, Kristina and Harutyunyan, Hrayr and Ver Steeg, Greg and Galstyan, Aram},
  booktitle = {International Conference on Machine Learning},
  pages     = {21--29},
  title     = {Mix{H}op: Higher-order graph convolutional architectures via sparsified neighborhood mixing},
  year      = 2019
}
@article{zhang2019gresnet,
  author  = {Zhang, Jiawei and Meng, Lin},
  journal = {arXiv:1909.05729},
  title   = {G{R}es{N}et: Graph residual network for reviving deep gnns from suspended animation},
  year    = 2019
}
@inproceedings{xu2018how,
  author    = {Keyulu Xu and Weihua Hu and Jure Leskovec and Stefanie Jegelka},
  booktitle = {International Conference on Learning Representations},
  title     = {How Powerful are Graph Neural Networks?},
  year      = 2019
}
@article{zou2019layer,
  author  = {Zou, Difan and Hu, Ziniu and Wang, Yewen and Jiang, Song and Sun, Yizhou and Gu, Quanquan},
  journal = {Neural Information Processing Systems},
  pages   = {11247--11256},
  title   = {Layer-dependent importance sampling for training deep and large graph convolutional networks},
  volume  = 32,
  year    = 2019
}
@article{oono2019graph,
  author  = {Oono, Kenta and Suzuki, Taiji},
  journal = {International Conference on Learning Representations},
  title   = {Graph neural networks exponentially lose expressive power for node classification},
  year    = 2019
}
@inproceedings{lee2019self,
  author    = {Lee, Junhyun and Lee, Inyeop and Kang, Jaewoo},
  booktitle = {International Conference on Machine Learning},
  pages     = {3734--3743},
  title     = {Self-attention graph pooling},
  year      = 2019
}
@inproceedings{gao2019graph,
  author    = {Gao, Hongyang and Ji, Shuiwang},
  booktitle = {International Conference on Machine Learning},
  pages     = {2083--2092},
  title     = {Graph {U}-{N}ets},
  year      = 2019
}
@article{kynkaanniemi2019improved,
  author  = {Kynk{\"a}{\"a}nniemi, Tuomas and Karras, Tero and Laine, Samuli and Lehtinen, Jaakko and Aila, Timo},
  journal = {Neural Information Processing Systems},
  pages   = {3929--3938},
  title   = {Improved precision and recall metric for assessing generative models},
  volume  = 32,
  year    = 2019
}
@article{brock2018large,
  author  = {Brock, Andrew and Donahue, Jeff and Simonyan, Karen},
  journal = {International Conference on Learning Representations},
  title   = {Large scale {GAN} training for high fidelity natural image synthesis},
  year    = 2019
}
@misc{odena2019open,
  author       = {Odena, Augustus},
  doi          = {10.23915/distill.00018},
  howpublished = {Distill, \url{https://distill.pub/2019/gan-open-problems}},
  journal      = {Distil},
  title        = {Open Questions about Generative Adversarial Networks},
  year         = 2019
}
@inproceedings{karras2019style,
  author    = {Karras, Tero and Laine, Samuli and Aila, Timo},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {4401--4410},
  title     = {A style-based generator architecture for generative adversarial networks},
  year      = 2019
}
@inproceedings{zhang2019self,
  author    = {Zhang, Han and Goodfellow, Ian and Metaxas, Dimitris and Odena, Augustus},
  booktitle = {International Conference on Machine Learning},
  pages     = {7354--7363},
  title     = {Self-attention generative adversarial networks},
  year      = 2019
}
@article{berard2019closer,
  author  = {Berard, Hugo and Gidel, Gauthier and Almahairi, Amjad and Vincent, Pascal and Lacoste-Julien, Simon},
  journal = {arXiv:1906.04848},
  title   = {A closer look at the optimization landscapes of generative adversarial networks},
  year    = 2019
}
@article{jolicoeur2018relativistic,
  author  = {Jolicoeur-Martineau, Alexia},
  journal = {International Conference on Learning Representations},
  title   = {The relativistic discriminator: A key element missing from standard {GAN}},
  year    = 2019
}
@article{peyre2019computational,
  author    = {Peyr{\'e}, Gabriel and Cuturi, Marco and others},
  journal   = {Foundations and Trends in Machine Learning},
  number    = {5-6},
  pages     = {355--607},
  publisher = {Now Publishers, Inc.},
  title     = {Computational optimal transport with applications to data science},
  volume    = 11,
  year      = 2019
}
@inproceedings{kurach2019large,
  author    = {Kurach, Karol and Lu{\v{c}}i{\'c}, Mario and Zhai, Xiaohua and Michalski, Marcin and Gelly, Sylvain},
  booktitle = {International Conference on Machine Learning},
  pages     = {3581--3590},
  title     = {A large-scale study on regularization and normalization in {GAN}s},
  year      = 2019
}
@inproceedings{bau2019seeing,
  author    = {Bau, David and Zhu, Jun-Yan and Wulff, Jonas and Peebles, William and Strobelt, Hendrik and Zhou, Bolei and Torralba, Antonio},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {4502--4511},
  title     = {Seeing what a {GAN} cannot generate},
  year      = 2019
}
@inproceedings{mao2019mode,
  author    = {Mao, Qi and Lee, Hsin-Ying and Tseng, Hung-Yu and Ma, Siwei and Yang, Ming-Hsuan},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {1429--1437},
  title     = {Mode seeking generative adversarial networks for diverse image synthesis},
  year      = 2019
}
@inproceedings{abdal2019image2stylegan,
  author    = {Abdal, Rameen and Qin, Yipeng and Wonka, Peter},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {4432--4441},
  title     = {Image2{S}tyle{GAN}: How to embed images into the {S}tyle{GAN} latent space?},
  year      = 2019
}
@inproceedings{behrmann2019invertible,
  author    = {Behrmann, Jens and Grathwohl, Will and Chen, Ricky TQ and Duvenaud, David and Jacobsen, J{\"o}rn-Henrik},
  booktitle = {International Conference on Machine Learning},
  pages     = {573--582},
  title     = {Invertible residual networks},
  year      = 2019
}
@article{song2019mintnet,
  author  = {Song, Yang and Meng, Chenlin and Ermon, Stefano},
  journal = {Neural Information Processing Systems},
  pages   = {11002--11012},
  title   = {Mint{N}et: Building invertible neural networks with masked convolutions},
  volume  = 32,
  year    = 2019
}
@inproceedings{brugger2019partially,
  author    = {Br{\"u}gger, Robin and Baumgartner, Christian F and Konukoglu, Ender},
  booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention},
  pages     = {429--437},
  title     = {A partially reversible {U}-{N}et for memory-efficient volumetric image segmentation},
  year      = 2019
}
@inproceedings{ho2019flow++,
  author    = {Ho, Jonathan and Chen, Xi and Srinivas, Aravind and Duan, Yan and Abbeel, Pieter},
  booktitle = {International Conference on Machine Learning},
  pages     = {2722--2730},
  title     = {Flow++: Improving flow-based generative models with variational dequantization and architecture design},
  year      = 2019
}
@inproceedings{abdelhamed2019noise,
  author    = {Abdelhamed, Abdelrahman and Brubaker, Marcus A and Brown, Michael S},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {3165--3173},
  title     = {Noise flow: Noise modeling with conditional normalizing flows},
  year      = 2019
}
@article{kumar2019videoflow,
  author  = {Kumar, Manoj and Babaeizadeh, Mohammad and Erhan, Dumitru and Finn, Chelsea and Levine, Sergey and Dinh, Laurent and Kingma, Durk},
  journal = {ICML Workshop on Invertible Neural Networks and Normalizing Flows},
  title   = {Video{F}low: A flow-based generative model for video},
  year    = 2019
}
@article{esling2019universal,
  author  = {Esling, Philippe and Masuda, Naotake and Bardet, Adrien and Despres, Romeo and others},
  journal = {International Conference on Digital Audio Effects},
  title   = {Universal audio synthesizer control with normalizing flows},
  year    = 2019
}
@inproceedings{prenger2019waveglow,
  author    = {Prenger, Ryan and Valle, Rafael and Catanzaro, Bryan},
  booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  pages     = {3617--3621},
  title     = {Waveglow: A flow-based generative network for speech synthesis},
  year      = 2019
}
@article{madhawa2019graphnvp,
  author  = {Madhawa, Kaushalya and Ishiguro, Katushiko and Nakago, Kosuke and Abe, Motoki},
  journal = {arXiv:1905.11600},
  title   = {Graph{NVP}: An invertible flow model for generating molecular graphs},
  year    = 2019
}
@article{muller2019neural,
  author    = {M{\"u}ller, Thomas and McWilliams, Brian and Rousselle, Fabrice and Gross, Markus and Nov{\'a}k, Jan},
  journal   = {ACM Transactions on Graphics (TOG)},
  number    = 5,
  pages     = {1--19},
  publisher = {ACM New York, NY, USA},
  title     = {Neural importance sampling},
  volume    = 38,
  year      = 2019
}
@article{noe2019boltzmann,
  author    = {No{\'e}, Frank and Olsson, Simon and K{\"o}hler, Jonas and Wu, Hao},
  journal   = {Science},
  number    = 6457,
  publisher = {American Association for the Advancement of Science},
  title     = {Boltzmann generators: Sampling equilibrium states of many-body systems with deep learning},
  volume    = 365,
  year      = 2019
}
@article{tran2019discrete,
  author  = {Tran, Dustin and Vafa, Keyon and Agrawal, Kumar and Dinh, Laurent and Poole, Ben},
  journal = {Neural Information Processing Systems},
  pages   = {14692--14701},
  title   = {Discrete flows: Invertible generative models of discrete data},
  volume  = 32,
  year    = 2019
}
@inproceedings{ziegler2019latent,
  author    = {Ziegler, Zachary and Rush, Alexander},
  booktitle = {International Conference on Machine Learning},
  pages     = {7673--7682},
  title     = {Latent normalizing flows for discrete sequences},
  year      = 2019
}
@article{zhou2019density,
  author  = {Zhou, Chunting and Ma, Xuezhe and Wang, Di and Neubig, Graham},
  journal = {ACL Human Language Technologies},
  pages   = {1588--1598},
  title   = {Density matching for bilingual word embedding},
  year    = 2019
}
@inproceedings{jin2019unsupervised,
  author    = {Jin, Lifeng and Doshi-Velez, Finale and Miller, Timothy and Schwartz, Lane and Schuler, William},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {2442--2452},
  title     = {Unsupervised learning of {PCFG}s with normalizing flow},
  year      = 2019
}
@article{schroecker2019generative,
  author  = {Schroecker, Yannick and Vecerik, Mel and Scholz, Jonathan},
  journal = {International Conference on Learning Representations},
  title   = {Generative predecessor models for sample-efficient imitation learning},
  year    = 2019
}
@article{ward2019improving,
  author  = {Ward, Patrick Nadeem and Smofsky, Ariella and Bose, Avishek Joey},
  journal = {ICML Workshop on Invertible Neural Networks and Normalizing Flows},
  title   = {Improving exploration in soft-actor-critic with normalizing flows policies},
  year    = 2019
}
@inproceedings{hoogeboom2019emerging,
  author    = {Hoogeboom, Emiel and Van Den Berg, Rianne and Welling, Max},
  booktitle = {International Conference on Machine Learning},
  pages     = {2771--2780},
  title     = {Emerging convolutions for generative normalizing flows},
  year      = 2019
}
@article{durkan2019cubic,
  author  = {Durkan, Conor and Bekasov, Artur and Murray, Iain and Papamakarios, George},
  journal = {ICML Invertible Neural Networks and Normalizing Flows},
  title   = {Cubic-spline flows},
  year    = 2019
}
@article{durkan2019neural,
  author  = {Durkan, Conor and Bekasov, Artur and Murray, Iain and Papamakarios, George},
  journal = {Neural Information Processing Systems},
  pages   = {7509--7520},
  title   = {Neural spline flows},
  volume  = 32,
  year    = 2019
}
@article{wehenkel2019unconstrained,
  author  = {Wehenkel, Antoine and Louppe, Gilles},
  journal = {Neural Information Processing Systems},
  pages   = {1543--1553},
  title   = {Unconstrained monotonic neural networks},
  volume  = 32,
  year    = 2019
}
@inproceedings{jaini2019sum,
  author    = {Jaini, Priyank and Selby, Kira A and Yu, Yaoliang},
  booktitle = {International Conference on Machine Learning},
  pages     = {3009--3018},
  title     = {Sum-of-squares polynomial flow},
  year      = 2019
}
@article{dinh2019rad,
  author  = {Dinh, Laurent and Sohl-Dickstein, Jascha and Larochelle, Hugo and Pascanu, Razvan},
  journal = {ICLR Workshop on Deep Generative Models for Highly Structured Data},
  title   = {A {RAD} approach to deep mixture models},
  year    = 2019
}
@article{Das2019DimensionalityRF,
  author  = {Das, Hari Prasanna and Abbeel, Pieter and Spanos, Costas J},
  journal = {arXiv:1908.01686},
  title   = {Likelihood Contribution based Multi-scale Architecture for Generative Flows},
  year    = 2019
}
@article{chang2019antisymmetricrnn,
  author  = {Chang, Bo and Chen, Minmin and Haber, Eldad and Chi, Ed H},
  journal = {International Conference on Learning Representations},
  title   = {Antisymmetric{RNN}: A dynamical system view on recurrent neural networks},
  year    = 2019
}
@article{chen2019residual,
  author  = {Chen, Ricky TQ and Behrmann, Jens and Duvenaud, David K and Jacobsen, J{\"o}rn-Henrik},
  journal = {Neural Information Processing Systems},
  pages   = {9913--9923},
  title   = {Residual flows for invertible generative modeling},
  volume  = 32,
  year    = 2019
}
@article{grathwohl2018ffjord,
  author  = {Grathwohl, Will and Chen, Ricky TQ and Bettencourt, Jesse and Sutskever, Ilya and Duvenaud, David},
  journal = {International Conference on Learning Representations},
  title   = {Ffjord: Free-form continuous dynamics for scalable reversible generative models},
  year    = 2019
}
@article{dupont2019augmented,
  author  = {Dupont, Emilien and Doucet, Arnaud and Teh, Yee Whye},
  journal = {Neural Information Processing Systems},
  pages   = {3134--3144},
  title   = {Augmented neural {ODE}s},
  volume  = 32,
  year    = 2019
}
@article{tzen2019neural,
  author  = {Tzen, Belinda and Raginsky, Maxim},
  journal = {arXiv:1905.09883},
  title   = {Neural stochastic differential equations: Deep latent {G}aussian models in the diffusion limit},
  year    = 2019
}
@article{hoogeboom2019integer,
  author  = {Hoogeboom, Emiel and Peters, Jorn and Van Den Berg, Rianne and Welling, Max},
  journal = {Neural Information Processing Systems},
  pages   = {12134--12144},
  title   = {Integer discrete flows and lossless compression},
  volume  = 32,
  year    = 2019
}
@article{wang2019riemannian,
  author  = {Wang, Prince Zizhuang and Wang, William Yang},
  journal = {ACL Human Language Technologies},
  pages   = {284--294},
  title   = {Riemannian normalizing flow on variational {W}asserstein autoencoder for text modeling},
  year    = 2019
}
@article{rezende2019equivariant,
  author  = {Rezende, Danilo Jimenez and Racani{\`e}re, S{\'e}bastien and Higgins, Irina and Toth, Peter},
  journal = {arXiv:1909.13739},
  title   = {Equivariant {H}amiltonian flows},
  year    = 2019
}
@article{kingma2019introduction,
  author    = {Kingma, Diederik P and Welling, Max and others},
  journal   = {Foundations and Trends in Machine Learning},
  number    = 4,
  pages     = {307--392},
  publisher = {Now Publishers, Inc.},
  title     = {An introduction to variational autoencoders},
  volume    = 12,
  year      = 2019
}
@article{gregor2018temporal,
  author  = {Gregor, Karol and Papamakarios, George and Besse, Frederic and Buesing, Lars and Weber, Theophane},
  journal = {International Conference on Learning Representations},
  title   = {Temporal difference variational auto-encoder},
  year    = 2019
}
@article{razavi2019generating,
  author  = {Razavi, Ali and Van den Oord, Aaron and Vinyals, Oriol},
  journal = {Neural Information Processing Systems},
  pages   = {14837--14847},
  title   = {Generating diverse high-fidelity images with {VQ-VAE}-2},
  volume  = 32,
  year    = 2019
}
@article{razavi2019preventing,
  author  = {Razavi, Ali and Oord, A{\"a}ron van den and Poole, Ben and Vinyals, Oriol},
  journal = {International Conference on Learning Representations},
  title   = {Preventing posterior collapse with delta-{VAE}s},
  year    = 2019
}
@article{lucas2019don,
  author  = {Lucas, James and Tucker, George and Grosse, Roger B and Norouzi, Mohammad},
  journal = {Neural Information Processing Systems},
  pages   = {9403--9413},
  title   = {Don't blame the {ELBO}! {A} linear {VAE} perspective on posterior collapse},
  volume  = 32,
  year    = 2019
}
@article{lucas2019understanding,
  author  = {Lucas, James and Tucker, George and Grosse, Roger and Norouzi, Mohammad},
  journal = {ICLR Workshop on Deep Generative Models for Highly Structured Data},
  title   = {Understanding posterior collapse in generative latent variable models},
  year    = 2019
}
@inproceedings{mathieu2019disentangling,
  author    = {Mathieu, Emile and Rainforth, Tom and Siddharth, Nana and Teh, Yee Whye},
  booktitle = {International Conference on Machine Learning},
  pages     = {4402--4412},
  title     = {Disentangling disentanglement in variational autoencoders},
  year      = 2019
}
@article{masrani2019thermodynamic,
  author  = {Masrani, Vaden and Le, Tuan Anh and Wood, Frank},
  journal = {Neural Information Processing Systems},
  pages   = {11521--11530},
  title   = {The thermodynamic variational objective},
  volume  = 32,
  year    = 2019
}
@article{song2019generative,
  author  = {Song, Yang and Ermon, Stefano},
  journal = {Neural Information Processing Systems},
  pages   = {11895--11907},
  title   = {Generative modeling by estimating gradients of the data distribution},
  volume  = 32,
  year    = 2019
}
@article{berner2019dota,
  author  = {Berner, Christopher and Brockman, Greg and Chan, Brooke and Cheung, Vicki and D{\k{e}}biak, Przemys{\l}aw and Dennison, Christy and Farhi, David and Fischer, Quirin and Hashme, Shariq and Hesse, Chris and others},
  journal = {arXiv:1912.06680},
  title   = {{DOTA} 2 with large scale deep reinforcement learning},
  year    = 2019
}
@article{kool2018attention,
  author  = {Kool, Wouter and van Hoof, Herke and Welling, Max},
  journal = {International Conference on Learning Representations},
  title   = {Attention, Learn to Solve Routing Problems!},
  year    = 2019
}
@inproceedings{fujimoto2019off,
  author    = {Fujimoto, Scott and Meger, David and Precup, Doina},
  booktitle = {International Conference on Machine Learning},
  pages     = {2052--2062},
  title     = {Off-policy deep reinforcement learning without exploration},
  year      = 2019
}
@article{kumar2019stabilizing,
  author  = {Kumar, Aviral and Fu, Justin and Soh, Matthew and Tucker, George and Levine, Sergey},
  journal = {Neural Information Processing Systems},
  pages   = {11761--11771},
  title   = {Stabilizing off-policy {Q}-learning via bootstrapping error reduction},
  volume  = 32,
  year    = 2019
}
@article{aubret2019survey,
  author  = {Aubret, Arthur and Matignon, Laetitia and Hassas, Salima},
  journal = {arXiv:1908.06976},
  title   = {A survey on intrinsic motivation in reinforcement learning},
  year    = 2019
}
@inproceedings{tan2019efficientnet,
  author    = {Tan, Mingxing and Le, Quoc},
  booktitle = {International Conference on Machine Learning},
  pages     = {6105--6114},
  title     = {Efficient{N}et: Rethinking model scaling for convolutional neural networks},
  year      = 2019
}
@inproceedings{cubuk2018autoaugment,
  author    = {Cubuk, Ekin D. and Zoph, Barret and Mané, Dandelion and Vasudevan, Vijay and Le, Quoc V.},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  doi       = {10.1109/CVPR.2019.00020},
  number    = {},
  pages     = {113--123},
  title     = {AutoAugment: Learning Augmentation Strategies From Data},
  volume    = {},
  year      = 2019
}
@article{du2018gradient,
  author  = {Du, Simon S and Zhai, Xiyu and Poczos, Barnabas and Singh, Aarti},
  journal = {International Conference on Learning Representations},
  title   = {Gradient descent provably optimizes over-parameterized neural networks},
  year    = 2019
}
@inproceedings{du2019gradient,
  author    = {Simon S. Du and Jason D. Lee and Haochuan Li and Liwei Wang and Xiyu Zhai},
  booktitle = {International Conference on Machine Learning},
  pages     = {1675--1685},
  title     = {Gradient Descent Finds Global Minima of Deep Neural Networks},
  year      = 2019
}
@article{allenzhu2019convergence,
  author  = {Zeyuan Allen-Zhu and Yuanzhi Li and Zhao Song},
  journal = {International Conference on Machine Learning},
  pages   = {242--252},
  title   = {A Convergence Theory for Deep Learning via Over-Parameterization},
  volume  = 97,
  year    = 2019
}
@article{kawaguchi2019effect,
  author    = {Kawaguchi, Kenji and Huang, Jiaoyang and Kaelbling, Leslie Pack},
  journal   = {Neural Computation},
  number    = 7,
  pages     = {1462--1498},
  publisher = {MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…},
  title     = {Effect of depth and width on local minima in deep learning},
  volume    = 31,
  year      = 2019
}
@article{frankle2018lottery,
  author  = {Frankle, Jonathan and Carbin, Michael},
  journal = {International Conference on Learning Representations},
  title   = {The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks},
  year    = 2019
}
@inproceedings{fort2019large,
  author    = {Stanislav Fort and Jastrz{\k{e}}bski, Stanis{\l}aw},
  booktitle = {Neural Information Processing Systems},
  pages     = {6706--6714},
  title     = {Large Scale Structure of Neural Network Loss Landscapes},
  volume    = 32,
  year      = 2019
}
@inproceedings{fort2019goldilocks,
  author    = {Fort, Stanislav and Scherlis, Adam},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {3574--3581},
  title     = {The {G}oldilocks zone: Towards better understanding of neural network loss landscapes},
  year      = 2019
}
@article{ilyas2019adversarial,
  author  = {Ilyas, Andrew and Santurkar, Shibani and Tsipras, Dimitris and Engstrom, Logan and Tran, Brandon and Madry, Aleksander},
  journal = {Neural Information Processing Systems},
  pages   = {125--136},
  title   = {Adversarial examples are not bugs, they are features},
  volume  = 32,
  year    = 2019
}
@article{liu2018darts,
  author  = {Liu, Hanxiao and Simonyan, Karen and Yang, Yiming},
  journal = {International Conference on Learning Representations},
  title   = {{DARTS}: Differentiable architecture search},
  year    = 2019
}
@article{Jobin-et-al-2019,
  author  = {Anna Jobin and Marcello Ienca and Effy Vayena},
  journal = {Nature Machine Intelligence},
  pages   = {389--399},
  title   = {The Global Landscape of {AI} Ethics Guidelines},
  volume  = 1,
  year    = 2019
}
@book{Russell-2019,
  author    = {Stuart Russell},
  publisher = {Viking},
  title     = {Human Compatible: Artificial Intelligence and the Problem of Control},
  year      = 2019
}
@article{Hubinger-et-al-2019,
  author  = {Evan Hubinger and Chris van Merwijk and Vladimir Mikulik and Joar Skalse and Scott Garrabrant},
  journal = {arXiv:1906.01820},
  title   = {Risks from Learned Optimization in Advanced Machine Learning Systems},
  year    = 2019
}
@inproceedings{Raji-Buolamwini-2019,
  author    = {Inioluwa Deborah Raji and Joy Buolamwini},
  booktitle = {AAAI/ACM Conference on AI, Ethics, and Society},
  pages     = {429--435},
  title     = {Actionable Auditing: Investigating the Impact of Publicly Naming Biased Performance Results of Commercial {AI} Products},
  year      = 2019
}
@article{Van-Wynsberghe-Robbins-2019,
  author  = {Aimee van Wynsberghe and Scott Robbins},
  journal = {Science and Engineering Ethics},
  pages   = {719--735},
  title   = {Critiquing the Reasons for Making Artificial Moral Agents},
  volume  = 25,
  year    = 2019
}
@article{Cervantes-et-al-2019,
  author  = {Jos{\'e}-Antonio Cervantes and Sonia L{\'o}pez and Luis-Felipe Rodr{\'i}guez and Salvador Cervantes and Francisco Cervantes and F{\'e}lix Ramos},
  journal = {Science and Engineering Ethics},
  pages   = {501--532},
  title   = {Artificial Moral Agents: A Survey of the Current Status},
  volume  = 26,
  year    = 2019
}
@misc{Berger-2019,
  author       = {Paul Berger},
  howpublished = {April 07, 2019. \url{https://www.wsj.com/articles/mtas-initial-foray-into-facial-recognition-at-high-speed-is-a-bust-11554642000}},
  journal      = {The Wall Street Journal},
  title        = {{MTA’s} Initial Foray Into Facial Recognition at High Speed Is a Bust},
  year         = 2019
}
@article{Leuner-2019,
  author  = {John Leuner},
  journal = {arXiv:1902.10739},
  title   = {A replication Study: Machine Learning Models are Capable of Predicting Sexual Orientation from Facial Images},
  year    = 2019
}
@inproceedings{Strubell-et-al-2019,
  author    = {Emma Strubell and Ananya Ganesh and Andrew McCallum},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {3645--3650},
  title     = {Energy and policy considerations for deep learning in {NLP}},
  year      = 2019
}
@book{Danaher-2019,
  author    = {John Danaher},
  publisher = {Harvard University Press},
  title     = {Automation and Utopia: Human Flourishing in a World without Work},
  year      = 2019
}
@book{Frey-2019,
  author    = {Carl Benedikt Frey},
  publisher = {Princeton University Press},
  title     = {The Technology Trap: Capital, Labour, and Power in the Age of Automation},
  year      = 2019
}
@techreport{Kratsios-2019,
  author      = {Michael Kratsios},
  institution = {Networking and Information Technology Research and Development},
  note        = {\url{https://www.nitrd.gov/pubs/National-AI-RD-Strategy-2019.pdf}},
  title       = {The National Artificial Intelligence Research and Development Strategic Plan: 2019 Update},
  year        = 2019
}
@article{Oconnor-Bruner-2019,
  author  = {Cailin O'Connor and Justin Bruner},
  journal = {Erkenntnis},
  pages   = {101--119},
  title   = {Dynamics and Diversity in Epistemic Communities},
  volume  = 84,
  year    = 2019
}
@book{Benjamin-2019,
  author    = {Ruha Benjamin},
  publisher = {Polity},
  title     = {Race After Technology: {A}bolitionist Tools for the New {J}im Code},
  year      = 2019
}
@inproceedings{Green-2019,
  author    = {Ben Green},
  booktitle = {NeurIPS Workshop on AI for Social Good},
  title     = {``{G}ood'' Isn't Good Enough},
  year      = 2019
}
@article{kobyzev2020normalizing,
  author    = {Kobyzev, Ivan and Prince, Simon J.D. and Brubaker, Marcus A},
  journal   = {IEEE Transactions on Pattern Analysis \& Machine Intelligence},
  number    = 11,
  pages     = {3964--3979},
  publisher = {IEEE},
  title     = {Normalizing flows: {A}n introduction and review of current methods},
  volume    = 43,
  year      = 2020
}
@inproceedings{karras2020analyzing,
  author    = {Karras, Tero and Laine, Samuli and Aittala, Miika and Hellsten, Janne and Lehtinen, Jaakko and Aila, Timo},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {8110--8119},
  title     = {Analyzing and improving the image quality of {StyleGAN}},
  year      = 2020
}
@article{brown2020language,
  author  = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
  journal = {Neural Information Processing Systems},
  pages   = {1877--1901},
  title   = {Language models are few-shot learners},
  volume  = 33,
  year    = 2020
}
@book{deisenroth2020mathematics,
  author    = {Deisenroth, Marc Peter and Faisal, A Aldo and Ong, Cheng Soon},
  publisher = {Cambridge University Press},
  title     = {Mathematics for machine learning},
  year      = 2020
}
@article{hamilton2020graph,
  author    = {Hamilton, William L},
  journal   = {Synthesis Lectures on Artifical Intelligence and Machine Learning},
  number    = 3,
  pages     = {1--159},
  publisher = {Morgan \& Claypool Publishers},
  title     = {Graph representation learning},
  volume    = 14,
  year      = 2020
}
@misc{kurenkov2020briefhistory,
  author       = {Kurenkov, Andrey},
  howpublished = {\url{https://www.skynettoday.com/overviews/neural-net-history}},
  journal      = {Skynet Today, September 27, 2020},
  title        = {{\em A Brief History of Neural Nets and Deep Learning}},
  year         = 2020
}
@article{qi2020on,
  author  = {Qi, Jun and Du, Jun and Siniscalchi, Sabato Marco and Ma, Xiaoli and Lee, Chin-Hui},
  journal = {IEEE Signal Processing Letters},
  pages   = {1485-–1489},
  title   = {On Mean Absolute Error for Deep Neural Network Based Vector-to-Vector Regression},
  volume  = 27,
  year    = 2020
}
@article{rodrigues2018expectation,
  author    = {Rodrigues, Filipe and Pereira, Francisco C},
  journal   = {IEEE Transactions on Neural Networks and Learning Systems},
  number    = 12,
  pages     = {5377--5389},
  publisher = {IEEE},
  title     = {Beyond expectation: Deep joint mean and quantile regression for spatiotemporal problems},
  volume    = 31,
  year      = 2020
}
@inproceedings{fan2020neural,
  author    = {Kai Fan and Bo Li and Jiayi Wang and Shiliang Zhang and Boxing Chen and Niyu Ge and Zhijie Yan},
  booktitle = {Interspeech},
  pages     = {606--610},
  title     = {Neural Zero-Inflated Quality Estimation Model for Automatic Speech Recognition System},
  year      = 2020
}
@article{sun2020optimization,
  author    = {Sun, Ruo-Yu},
  journal   = {Journal of the Operations Research Society of China},
  number    = 2,
  pages     = {249--294},
  publisher = {Springer},
  title     = {Optimization for deep learning: An overview},
  volume    = 8,
  year      = 2020
}
@article{huang2020improving,
  author  = {Huang, Xiao Shi and Perez, Felipe and Ba, Jimmy and Volkovs, Maksims},
  journal = {International Conference on Machine Learning},
  pages   = {4475--4483},
  title   = {Improving transformer optimization through better initialization},
  year    = 2020
}
@article{li2020pytorch,
  author  = {Shen Li and Yanli Zhao and Rohan Varma and Omkar Salpekar and Pieter Noordhuis and Teng Li and Adam Paszke and Jeff Smith and Brian Vaughan and Pritam Damania and Soumith Chintala},
  journal = {International Conference on Very Large Databases},
  title   = {PyTorch Distributed: Experiences on Accelerating Data Parallel Training},
  year    = 2020
}
@article{greydanus2020scaling,
  author  = {Greydanus, Sam},
  journal = {arXiv:2011.14439},
  title   = {Scaling down deep learning},
  year    = 2020
}
@inproceedings{ishida2020we,
  author    = {Ishida, Takashi and Yamane, Ikko and Sakai, Tomoya and Niu, Gang and Sugiyama, Masashi},
  booktitle = {International Conference on Machine Learning},
  pages     = {4604--4614},
  title     = {Do We Need Zero Training Loss after Achieving Zero Training Error?},
  year      = 2020
}
@book{lattimore2020bandit,
  author    = {Lattimore, Tor and Szepesv{\'a}ri, Csaba},
  publisher = {Cambridge University Press},
  title     = {Bandit algorithms},
  year      = 2020
}
@inproceedings{smith2020generalization,
  author    = {Smith, Samuel and Elsen, Erich and De, Soham},
  booktitle = {International Conference on Machine Learning},
  pages     = {9058--9067},
  title     = {On the generalization benefit of noise in stochastic gradient descent},
  year      = 2020
}
@article{wenzel2020hyperparameter,
  author  = {Wenzel, Florian and Snoek, Jasper and Tran, Dustin and Jenatton, Rodolphe},
  journal = {Neural Information Processing Systems},
  pages   = {6514--6527},
  title   = {Hyperparameter ensembles for robustness and uncertainty quantification},
  volume  = 33,
  year    = 2020
}
@inproceedings{frankle2020linear,
  author    = {Frankle, Jonathan and Dziugaite, Gintare Karolina and Roy, Daniel M. and Carbin, Michael},
  booktitle = {International Conference on Machine Learning},
  pages     = {3259--3269},
  title     = {Linear Mode Connectivity and the Lottery Ticket Hypothesis},
  year      = 2020
}
@article{wenzel2020good,
  author  = {Wenzel, Florian and Roth, Kevin and Veeling, Bastiaan S and {\'S}wi{\k{a}}tkowski, Jakub and Tran, Linh and Mandt, Stephan and Snoek, Jasper and Salimans, Tim and Jenatton, Rodolphe and Nowozin, Sebastian},
  journal = {International Conference on Machine Learning},
  pages   = {10248--10259},
  title   = {How good is the {B}ayes posterior in deep neural networks really?},
  year    = 2020
}
@article{zhuang2020comprehensive,
  author    = {Zhuang, Fuzhen and Qi, Zhiyuan and Duan, Keyu and Xi, Dongbo and Zhu, Yongchun and Zhu, Hengshu and Xiong, Hui and He, Qing},
  journal   = {Proceedings of the IEEE},
  number    = 1,
  pages     = {43--76},
  publisher = {IEEE},
  title     = {A comprehensive survey on transfer learning},
  volume    = 109,
  year      = 2020
}
@book{yang2020transfer,
  author    = {Yang, Qiang and Zhang, Yu and Dai, Wenyuan and Pan, Sinno Jialin},
  doi       = {10.1017/9781139061773},
  publisher = {Cambridge University Press},
  title     = {Transfer learning},
  year      = 2020
}
@inproceedings{chen2020simple,
  author    = {Chen, Ting and Kornblith, Simon and Norouzi, Mohammad and Hinton, Geoffrey},
  booktitle = {International Conference on Machine Learning},
  pages     = {1597--1607},
  title     = {A simple framework for contrastive learning of visual representations},
  year      = 2020
}
@article{jing2020self,
  author  = {Jing, Longlong and Tian, Yingli},
  journal = {IEEE Transactions on Pattern Analysis \& Machine Intelligence},
  number  = 11,
  pages   = {4037--4058},
  title   = {Self-supervised visual feature learning with deep neural networks: A survey},
  volume  = 43,
  year    = 2020
}
@inproceedings{you2020does,
  author    = {You, Yuning and Chen, Tianlong and Wang, Zhangyang and Shen, Yang},
  booktitle = {International Conference on Machine Learning},
  pages     = {10871--10880},
  title     = {When does self-supervision help graph convolutional networks?},
  year      = 2020
}
@inproceedings{zhong2020random,
  author    = {Zhong, Zhun and Zheng, Liang and Kang, Guoliang and Li, Shaozi and Yang, Yi},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {13001--13008},
  title     = {Random erasing data augmentation},
  year      = 2020
}
@inproceedings{feng2020genaug,
  author    = {Feng, Steven Y. and Gangal, Varun and Kang, Dongyeop and Mitamura, Teruko and Hovy, Eduard},
  booktitle = {ACL Deep Learning Inside Out},
  pages     = {29--42},
  title     = {{G}en{A}ug: Data Augmentation for Finetuning Text Generators},
  year      = 2020
}
@article{min2020syntactic,
  author  = {Min, Junghyun and McCoy, R Thomas and Das, Dipanjan and Pitler, Emily and Linzen, Tal},
  journal = {Meeting of the Association for Computational Linguistics},
  pages   = {2339--2352},
  title   = {Syntactic data augmentation increases robustness to inference heuristics},
  year    = 2020
}
@inproceedings{qiu2020easyaug,
  author    = {Qiu, Siyuan and Xu, Binxia and Zhang, Jie and Wang, Yafang and Shen, Xiaoyu and De Melo, Gerard and Long, Chong and Li, Xiaolong},
  booktitle = {Companion Proceedings of the Web Conference 2020},
  pages     = {249--252},
  title     = {Easy{A}ug: An automatic textual data augmentation platform for classification tasks},
  year      = 2020
}
@inproceedings{xiong2020variational,
  author    = {Xiong, Zhitong and Yuan, Yuan and Guo, Nianhui and Wang, Qi},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {3992--4002},
  title     = {Variational context-deformable convnets for indoor scene parsing},
  year      = 2020
}
@inproceedings{sankararaman2020impact,
  author    = {Sankararaman, Karthik Abinav and De, Soham and Xu, Zheng and Huang, W Ronny and Goldstein, Tom},
  booktitle = {International Conference on Machine Learning},
  pages     = {8469--8479},
  title     = {The impact of neural network overparameterization on gradient confusion and stochastic gradient descent},
  year      = 2020
}
@misc{luther2020why,
  author       = {Luther, Kyle},
  howpublished = {\url{https://kyleluther.github.io/2020/02/18/BatchNorm-exploding-gradients.html}},
  title        = {Why {B}atch{N}orm Causes Exploding Gradients},
  year         = 2020
}
@article{de2020batch,
  author  = {De, Soham and Smith, Sam},
  journal = {Neural Information Processing Systems},
  pages   = {19964--19975},
  title   = {Batch normalization biases residual blocks towards the identity function in deep networks},
  volume  = 33,
  year    = 2020
}
@inproceedings{wang2020transformer,
  author    = {Wang, Yongqiang and Mohamed, Abdelrahman and Le, Due and Liu, Chunxi and Xiao, Alex and Mahadeokar, Jay and Huang, Hongzhao and Tjandra, Andros and Zhang, Xiaohui and Zhang, Frank and others},
  booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  pages     = {6874--6878},
  title     = {Transformer-based acoustic modeling for hybrid speech recognition},
  year      = 2020
}
@inproceedings{Lample2020Deep,
  author    = {Guillaume Lample and François Charton},
  booktitle = {International Conference on Learning Representations},
  title     = {Deep Learning For Symbolic Mathematics},
  year      = 2020
}
@article{wu2020deep,
  author  = {Wu, Neo and Green, Bradley and Ben, Xue and O'Banion, Shawn},
  journal = {arXiv:2001.08317},
  title   = {Deep transformer models for time series forecasting: The influenza prevalence case},
  year    = 2020
}
@inproceedings{bender2020climbing,
  author    = {Bender, Emily M and Koller, Alexander},
  booktitle = {Meeting of the Association for Computational Linguistics},
  pages     = {5185--5198},
  title     = {Climbing towards {NLU}: On meaning, form, and understanding in the age of data},
  year      = 2020
}
@article{provilkov2019bpe,
  author  = {Provilkov, Ivan and Emelianenko, Dmitrii and Voita, Elena},
  journal = {Meeting of the Association for Computational Linguistics},
  pages   = {1882--1892},
  title   = {{BPE}-{D}ropout: Simple and effective subword regularization},
  year    = 2020
}
@article{he2020dynamic,
  author  = {He, Xuanli and Haffari, Gholamreza and Norouzi, Mohammad},
  journal = {Meeting of the Association for Computational Linguistics},
  pages   = {3042--3051},
  title   = {Dynamic programming encoding for subword segmentation in neural machine translation},
  year    = 2020
}
@article{holtzman2019curious,
  author  = {Holtzman, Ari and Buys, Jan and Du, Li and Forbes, Maxwell and Choi, Yejin},
  journal = {International Conference on Learning Representations},
  title   = {The curious case of neural text degeneration},
  year    = 2020
}
@misc{elasri2020neural1,
  author       = {El Asri, Layla and Prince, Simon, Jeremy Damion},
  howpublished = {\url{https://www.borealisai.com/research-blogs/tutorial-6-neural-natural-language-generation-decoding-algorithms/}},
  title        = {Tutorial \#6: Neural natural language generation – decoding algorithms},
  year         = 2020
}
@article{cordonnier2019relationship,
  author  = {Cordonnier, Jean-Baptiste and Loukas, Andreas and Jaggi, Martin},
  journal = {International Conference on Learning Representations},
  title   = {On the relationship between self-attention and convolutional layers},
  year    = 2020
}
@article{choromanski2020rethinking,
  author  = {Choromanski, Krzysztof and Likhosherstov, Valerii and Dohan, David and Song, Xingyou and Gane, Andreea and Sarlos, Tamas and Hawkins, Peter and Davis, Jared and Mohiuddin, Afroz and Kaiser, Lukasz and others},
  journal = {International Conference on Learning Representations},
  title   = {Rethinking attention with {P}erformers},
  year    = 2020
}
@inproceedings{wang2020position,
  author    = {Wang, Benyou and Shang, Lifeng and Lioma, Christina and Jiang, Xin and Yang, Hao and Liu, Qun and Simonsen, Jakob Grue},
  booktitle = {International Conference on Learning Representations},
  title     = {On position embeddings in {BERT}},
  year      = 2020
}
@article{raffel2020exploring,
  author  = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J and others},
  journal = {Journal of Machine Learning Research},
  number  = 140,
  pages   = {1--67},
  title   = {Exploring the limits of transfer learning with a unified text-to-text transformer},
  volume  = 21,
  year    = 2020
}
@article{huang2020improve,
  author  = {Huang, Zhiheng and Liang, Davis and Xu, Peng and Xiang, Bing},
  journal = {Empirical Methods in Natural Language Processing},
  title   = {Improve transformer models with better relative position embeddings},
  year    = 2020
}
@article{wang2020linformer,
  author  = {Wang, Sinong and Li, Belinda Z and Khabsa, Madian and Fang, Han and Ma, Hao},
  journal = {arXiv:2006.04768},
  title   = {Linformer: Self-attention with linear complexity},
  year    = 2020
}
@article{beltagy2020longformer,
  author  = {Beltagy, Iz and Peters, Matthew E and Cohan, Arman},
  journal = {arXiv:2004.05150},
  title   = {Longformer: The long-document transformer},
  year    = 2020
}
@inproceedings{ainslie2020etc,
  author    = {Joshua Ainslie and Santiago Onta{\~{n}}{\'{o}}n and Chris Alberti and Vaclav Cvicek and Zachary Fisher and Philip Pham and Anirudh Ravula and Sumit Sanghai and Qifan Wang and Li Yang},
  booktitle = {ACL Empirical Methods in Natural Language Processing},
  pages     = {268--284},
  title     = {{ETC:} {E}ncoding Long and Structured Inputs in Transformers},
  year      = 2020
}
@article{kitaev2020reformer,
  author  = {Kitaev, Nikita and Kaiser, {\L}ukasz and Levskaya, Anselm},
  journal = {International Conference on Learning Representations},
  title   = {Reformer: The efficient transformer},
  year    = 2020
}
@inproceedings{tay2020sparse,
  author    = {Tay, Yi and Bahri, Dara and Yang, Liu and Metzler, Donald and Juan, Da-Cheng},
  booktitle = {International Conference on Machine Learning},
  pages     = {9438--9447},
  title     = {Sparse {S}inkhorn attention},
  year      = 2020
}
@inproceedings{katharopoulos2020transformers,
  author    = {Katharopoulos, Angelos and Vyas, Apoorv and Pappas, Nikolaos and Fleuret, Fran{\c{c}}ois},
  booktitle = {International Conference on Machine Learning},
  pages     = {5156--5165},
  title     = {Transformers are {RNN}s: Fast autoregressive transformers with linear attention},
  year      = 2020
}
@inproceedings{xiong2020layer,
  author    = {Xiong, Ruibin and Yang, Yunchang and He, Di and Zheng, Kai and Zheng, Shuxin and Xing, Chen and Zhang, Huishuai and Lan, Yanyan and Wang, Liwei and Liu, Tieyan},
  booktitle = {International Conference on Machine Learning},
  pages     = {10524--10533},
  title     = {On layer normalization in the transformer architecture},
  year      = 2020
}
@inproceedings{shen2020powernorm,
  author    = {Shen, Sheng and Yao, Zhewei and Gholami, Amir and Mahoney, Michael and Keutzer, Kurt},
  booktitle = {International Conference on Machine Learning},
  pages     = {8741--8751},
  title     = {Power{N}orm: Rethinking batch normalization in transformers},
  year      = 2020
}
@article{liu2020understanding,
  author  = {Liu, Liyuan and Liu, Xiaodong and Gao, Jianfeng and Chen, Weizhu and Han, Jiawei},
  journal = {Empirical Methods in Natural Language Processing},
  pages   = {5747--5763},
  title   = {Understanding the difficulty of training transformers},
  year    = 2020
}
@inproceedings{chen2020generative,
  author    = {Chen, Mark and Radford, Alec and Child, Rewon and Wu, Jeffrey and Jun, Heewoo and Luan, David and Sutskever, Ilya},
  booktitle = {International Conference on Machine Learning},
  pages     = {1691--1703},
  title     = {Generative pretraining from pixels},
  year      = 2020
}
@inproceedings{carion2020end,
  author    = {Carion, Nicolas and Massa, Francisco and Synnaeve, Gabriel and Usunier, Nicolas and Kirillov, Alexander and Zagoruyko, Sergey},
  booktitle = {European Conference on Computer Vision},
  pages     = {213--229},
  title     = {End-to-end object detection with transformers},
  year      = 2020
}
@article{zhu2020deformable,
  author  = {Zhu, Xizhou and Su, Weijie and Lu, Lewei and Li, Bin and Wang, Xiaogang and Dai, Jifeng},
  journal = {International Conference on Learning Representations},
  title   = {Deformable {DETR}: Deformable transformers for end-to-end object detection},
  year    = 2020
}
@inproceedings{yang2020learning,
  author    = {Yang, Fuzhi and Yang, Huan and Fu, Jianlong and Lu, Hongtao and Guo, Baining},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5791--5800},
  title     = {Learning texture transformer network for image super-resolution},
  year      = 2020
}
@inproceedings{zhao2020uctgan,
  author    = {Zhao, Lei and Mo, Qihang and Lin, Sihuan and Wang, Zhizhong and Zuo, Zhiwen and Chen, Haibo and Xing, Wei and Lu, Dongming},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5741--5750},
  title     = {{UCTGAN}: Diverse image inpainting based on unsupervised cross-space translation},
  year      = 2020
}
@article{wu2020visual,
  author  = {Wu, Bichen and Xu, Chenfeng and Dai, Xiaoliang and Wan, Alvin and Zhang, Peizhao and Yan, Zhicheng and Tomizuka, Masayoshi and Gonzalez, Joseph and Keutzer, Kurt and Vajda, Peter},
  journal = {arXiv:2006.03677},
  title   = {Visual transformers: Token-based image representation and processing for computer vision},
  year    = 2020
}
@article{locatello2020object,
  author  = {Locatello, Francesco and Weissenborn, Dirk and Unterthiner, Thomas and Mahendran, Aravindh and Heigold, Georg and Uszkoreit, Jakob and Dosovitskiy, Alexey and Kipf, Thomas},
  journal = {Neural Information Processing Systems},
  pages   = {11525--11538},
  title   = {Object-centric learning with slot attention},
  volume  = 33,
  year    = 2020
}
@inproceedings{chen2020uniter,
  author    = {Chen, Yen-Chun and Li, Linjie and Yu, Licheng and El Kholy, Ahmed and Ahmed, Faisal and Gan, Zhe and Cheng, Yu and Liu, Jingjing},
  booktitle = {European Conference on Computer Vision},
  pages     = {104--120},
  title     = {{UNITER}: Universal image-text representation learning},
  year      = 2020
}
@inproceedings{zhao2020exploring,
  author    = {Zhao, Hengshuang and Jia, Jiaya and Koltun, Vladlen},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {10076--10085},
  title     = {Exploring self-attention for image recognition},
  year      = 2020
}
@article{zhou2020graph,
  author    = {Zhou, Jie and Cui, Ganqu and Hu, Shengding and Zhang, Zhengyan and Yang, Cheng and Liu, Zhiyuan and Wang, Lifeng and Li, Changcheng and Sun, Maosong},
  journal   = {AI Open},
  pages     = {57--81},
  publisher = {Elsevier},
  title     = {Graph neural networks: A review of methods and applications},
  volume    = 1,
  year      = 2020
}
@article{wu2020comprehensive,
  author  = {Wu, Zonghan and Pan, Shirui and Chen, Fengwen and Long, Guodong and Zhang, Chengqi and Philip, S Yu},
  journal = {IEEE Transactions on Neural Networks and Learning Systems},
  number  = 1,
  pages   = {4--24},
  title   = {A comprehensive survey on graph neural networks},
  volume  = 32,
  year    = 2020
}
@article{chami2020machine,
  author  = {Chami, Ines and Abu-El-Haija, Sami and Perozzi, Bryan and R{\'e}, Christopher and Murphy, Kevin},
  journal = {arXiv:2005.03675},
  title   = {Machine learning on graphs: A model and comprehensive taxonomy},
  year    = 2020
}
@inproceedings{loukas2020what,
  author    = {Andreas Loukas},
  booktitle = {International Conference on Learning Representations},
  title     = {What graph neural networks cannot learn: {D}epth vs width},
  year      = 2020
}
@article{tsitsulin2020graph,
  author  = {Tsitsulin, Anton and Palowitch, John and Perozzi, Bryan and M{\"u}ller, Emmanuel},
  journal = {arXiv:2006.16904},
  title   = {Graph clustering with graph neural networks},
  year    = 2020
}
@article{mutlu2020review,
  author    = {Mutlu, Ece C and Oghaz, Toktam and Rajabi, Amirarsalan and Garibay, Ivan},
  journal   = {Machine Learning and Knowledge Extraction},
  number    = 4,
  pages     = {672--704},
  publisher = {MDPI},
  title     = {Review on learning and extracting graph features for link prediction},
  volume    = 2,
  year      = 2020
}
@article{kumar2020link,
  author    = {Kumar, Ajay and Singh, Shashank Sheshar and Singh, Kuldeep and Biswas, Bhaskar},
  journal   = {Physica A: Statistical Mechanics and its Applications},
  pages     = 124289,
  publisher = {Elsevier},
  title     = {Link prediction techniques, applications, and performance: A survey},
  volume    = 553,
  year      = 2020
}
@article{rossi2020sign,
  author    = {Rossi, Emanuele and Frasca, Fabrizio and Chamberlain, Ben and Eynard, Davide and Bronstein, Michael and Monti, Federico},
  journal   = {ICML Graph Representation Learning and Beyond Workshop},
  pages     = 15,
  publisher = {2020b},
  title     = {{SIGN}: Scalable inception graph neural networks},
  volume    = 7,
  year      = 2020
}
@article{zhang2020graph,
  author  = {Zhang, Jiawei and Zhang, Haopeng and Xia, Congying and Sun, Li},
  journal = {arXiv:2001.05140},
  title   = {Graph-{B}ert: Only attention is needed for learning graph representations},
  year    = 2020
}
@article{zeng2019graphsaint,
  author  = {Zeng, Hanqing and Zhou, Hongkuan and Srivastava, Ajitesh and Kannan, Rajgopal and Prasanna, Viktor},
  journal = {International Conference on Learning Representations},
  title   = {Graph{SAINT}: Graph sampling based inductive learning method},
  year    = 2020
}
@inproceedings{rozemberczki2020little,
  author    = {Rozemberczki, Benedek and Kiss, Oliver and Sarkar, Rik},
  booktitle = {ACM International Conference on Information \& Knowledge Management},
  pages     = {3133--3140},
  title     = {Little ball of fur: A {P}ython library for graph sampling},
  year      = 2020
}
@inproceedings{Rong2020DropEdge,
  author    = {Yu Rong and Wenbing Huang and Tingyang Xu and Junzhou Huang},
  booktitle = {International Conference on Learning Representations},
  title     = {Drop{E}dge: Towards Deep Graph Convolutional Networks on Node Classification},
  year      = 2020
}
@inproceedings{teru2020inductive,
  author    = {Teru, Komal and Denis, Etienne and Hamilton, Will},
  booktitle = {International Conference on Machine Learning},
  pages     = {9448--9457},
  title     = {Inductive relation prediction by subgraph reasoning},
  year      = 2020
}
@inproceedings{hasanzadeh2020bayesian,
  author    = {Hasanzadeh, Arman and Hajiramezanali, Ehsan and Boluki, Shahin and Zhou, Mingyuan and Duffield, Nick and Narayanan, Krishna and Qian, Xiaoning},
  booktitle = {International Conference on Machine Learning},
  pages     = {4094--4104},
  title     = {Bayesian graph neural networks with adaptive connection sampling},
  year      = 2020
}
@article{zhao2019pairnorm,
  author  = {Zhao, Lingxiao and Akoglu, Leman},
  journal = {International Conference on Learning Representations},
  title   = {Pair{N}orm: Tackling oversmoothing in {GNN}s},
  year    = 2020
}
@article{zhou2020towards,
  author  = {Zhou, Kaixiong and Huang, Xiao and Li, Yuening and Zha, Daochen and Chen, Rui and Hu, Xia},
  journal = {Neural Information Processing Systems},
  pages   = {4917--4928},
  title   = {Towards deeper graph neural networks with differentiable group normalization},
  volume  = 33,
  year    = 2020
}
@article{li2020deepergcn,
  author  = {Li, Guohao and Xiong, Chenxin and Thabet, Ali and Ghanem, Bernard},
  journal = {arXiv:2006.07739},
  title   = {Deeper{GCN}: All you need to train deeper {GCN}s},
  year    = 2020
}
@inproceedings{gong2020geometrically,
  author    = {Gong, Shunwang and Bahri, Mehdi and Bronstein, Michael M and Zafeiriou, Stefanos},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {11415--11424},
  title     = {Geometrically principled connections in graph neural networks},
  year      = 2020
}
@inproceedings{chen2020simple2,
  author    = {Chen, Ming and Wei, Zhewei and Huang, Zengfeng and Ding, Bolin and Li, Yaliang},
  booktitle = {International Conference on Machine Learning},
  pages     = {1725--1735},
  title     = {Simple and deep graph convolutional networks},
  year      = 2020
}
@article{ho2020denoising,
  author  = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
  journal = {Neural Information Processing Systems},
  pages   = {6840--6851},
  title   = {Denoising diffusion probabilistic models},
  volume  = 33,
  year    = 2020
}
@inproceedings{farnia2020gans,
  author    = {Farnia, Farzan and Ozdaglar, Asuman},
  booktitle = {International Conference on Machine Learning},
  pages     = {3029--3039},
  title     = {Do {GAN}s always have {N}ash equilibria?},
  year      = 2020
}
@inproceedings{jin2020local,
  author    = {Jin, Chi and Netrapalli, Praneeth and Jordan, Michael},
  booktitle = {International Conference on Machine Learning},
  pages     = {4880--4889},
  title     = {What is local optimality in nonconvex-nonconcave minimax optimization?},
  year      = 2020
}
@article{qi2020loss,
  author    = {Qi, Guo-Jun},
  journal   = {International Journal of Computer Vision},
  number    = 5,
  pages     = {1118--1140},
  publisher = {Springer},
  title     = {Loss-sensitive generative adversarial networks on {L}ipschitz densities},
  volume    = 128,
  year      = 2020
}
@article{sinha2020top,
  author  = {Samarth Sinha and Zhengli Zhao and Anirudh Goyal and Colin Raffel and Augustus Odena},
  journal = {Neural Information Processing Systems},
  pages   = {14638--14649},
  title   = {Top-k training of {GAN}s: Improving {GAN} performance by throwing away bad samples},
  volume  = 33,
  year    = 2020
}
@misc{chintala2020how,
  author       = {Chintala, Soumith and Denton, Emily and Arjovsky, Martin and Matheiu, Michael},
  howpublished = {\url{https://github.com/soumith/ganhacks}},
  title        = {How to Train a {GAN}? {T}ips and tricks to make {GAN}s work},
  year         = 2020
}
@article{karras2020training,
  author  = {Karras, Tero and Aittala, Miika and Hellsten, Janne and Laine, Samuli and Lehtinen, Jaakko and Aila, Timo},
  journal = {Neural Information Processing Systems},
  pages   = {12104--12114},
  title   = {Training generative adversarial networks with limited data},
  volume  = 33,
  year    = 2020
}
@inproceedings{collins2020editing,
  author    = {Collins, Edo and Bala, Raja and Price, Bob and Susstrunk, Sabine},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {5771--5780},
  title     = {Editing in style: Uncovering the local semantics of {GAN}s},
  year      = 2020
}
@article{harkonen2020ganspace,
  author  = {H{\"a}rk{\"o}nen, Erik and Hertzmann, Aaron and Lehtinen, Jaakko and Paris, Sylvain},
  journal = {Neural Information Processing Systems},
  pages   = {9841--9850},
  title   = {{GANS}pace: Discovering interpretable {GAN} controls},
  volume  = 33,
  year    = 2020
}
@inproceedings{shen2020interpreting,
  author    = {Shen, Yujun and Gu, Jinjin and Tang, Xiaoou and Zhou, Bolei},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {9243--9252},
  title     = {Interpreting the latent space of {GAN}s for semantic face editing},
  year      = 2020
}
@inproceedings{tewari2020stylerig,
  author    = {Tewari, Ayush and Elgharib, Mohamed and Bharaj, Gaurav and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollhofer, Michael and Theobalt, Christian},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {6142--6151},
  title     = {Style{R}ig: Rigging {S}tyle{GAN} for 3{D} control over portrait images},
  year      = 2020
}
@article{guan2020collaborative,
  author  = {Guan, Shanyan and Tai, Ying and Ni, Bingbing and Zhu, Feida and Huang, Feiyue and Yang, Xiaokang},
  journal = {arXiv:2007.01758},
  title   = {Collaborative learning for faster {S}tyle{GAN} embedding},
  year    = 2020
}
@inproceedings{zhu2020domain,
  author    = {Zhu, Jiapeng and Shen, Yujun and Zhao, Deli and Zhou, Bolei},
  booktitle = {European Conference on Computer Vision},
  pages     = {592--608},
  title     = {In-domain {GAN} inversion for real image editing},
  year      = 2020
}
@inproceedings{abdal2020image2stylegan++,
  author    = {Abdal, Rameen and Qin, Yipeng and Wonka, Peter},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {8296--8305},
  title     = {Image2{S}tyle{GAN}++: How to edit the embedded images?},
  year      = 2020
}
@article{etmann2020iunets,
  author  = {Etmann, Christian and Ke, Rihuan and Sch{\"o}nlieb, Carola-Bibiane},
  journal = {IEEE International Workshop on Machine Learning for Signal Processing},
  title   = {iUNets: Fully invertible {U}-{N}ets with learnable up-and downsampling},
  year    = 2020
}
@article{yu2020wavelet,
  author  = {Yu, Jason J and Derpanis, Konstantinos G and Brubaker, Marcus A},
  journal = {Neural Information Processing Systems},
  pages   = {6184--6196},
  title   = {Wavelet flow: Fast training of high resolution normalizing flows},
  volume  = 33,
  year    = 2020
}
@inproceedings{ardizzone2020conditional,
  author    = {Ardizzone, Lynton and Kruse, Jakob and L{\"u}th, Carsten and Bracher, Niels and Rother, Carsten and K{\"o}the, Ullrich},
  booktitle = {DAGM German Conference on Pattern Recognition},
  pages     = {373--387},
  title     = {Conditional invertible neural networks for diverse image-to-image translation},
  year      = 2020
}
@article{kanwar2020equivariant,
  author    = {Kanwar, Gurtej and Albergo, Michael S and Boyda, Denis and Cranmer, Kyle and Hackett, Daniel C and Racaniere, S{\'e}bastien and Rezende, Danilo Jimenez and Shanahan, Phiala E},
  journal   = {Physical Review Letters},
  number    = 12,
  pages     = 121601,
  publisher = {APS},
  title     = {Equivariant flow-based sampling for lattice gauge theory},
  volume    = 125,
  year      = 2020
}
@inproceedings{kohler2020equivariant,
  author    = {K{\"o}hler, Jonas and Klein, Leon and No{\'e}, Frank},
  booktitle = {International Conference on Machine Learning},
  pages     = {5361--5370},
  title     = {Equivariant flows: Exact likelihood generative learning for symmetric densities},
  year      = 2020
}
@article{wirnsberger2020targeted,
  author    = {Wirnsberger, Peter and Ballard, Andrew J and Papamakarios, George and Abercrombie, Stuart and Racani{\`e}re, S{\'e}bastien and Pritzel, Alexander and Jimenez Rezende, Danilo and Blundell, Charles},
  journal   = {The Journal of Chemical Physics},
  number    = 14,
  pages     = 144112,
  publisher = {AIP Publishing LLC},
  title     = {Targeted free energy estimation via learned mappings},
  volume    = 153,
  year      = 2020
}
@article{wong2020gravitational,
  author    = {Wong, Kaze WK and Contardo, Gabriella and Ho, Shirley},
  journal   = {Physical Review D},
  number    = 12,
  pages     = 123005,
  publisher = {APS},
  title     = {Gravitational-wave population inference with deep flow-based generative network},
  volume    = 101,
  year      = 2020
}
@inproceedings{mazoure2020leveraging,
  author    = {Mazoure, Bogdan and Doan, Thang and Durand, Audrey and Pineau, Joelle and Hjelm, R Devon},
  booktitle = {Conference on Robot Learning},
  pages     = {430--444},
  title     = {Leveraging exploration in off-policy algorithms via normalizing flows},
  year      = 2020
}
@inproceedings{touati2020randomized,
  author    = {Touati, Ahmed and Satija, Harsh and Romoff, Joshua and Pineau, Joelle and Vincent, Pascal},
  booktitle = {Uncertainty in Artificial Intelligence},
  pages     = {422--432},
  title     = {Randomized value functions via multiplicative normalizing flows},
  year      = 2020
}
@inproceedings{finlay2020train,
  author    = {Chris Finlay and J{\"{o}}rn{-}Henrik Jacobsen and Levon Nurbekyan and Adam M. Oberman},
  booktitle = {International Conference on Machine Learning},
  pages     = {3154--3164},
  title     = {How to Train Your Neural {ODE:} {T}he World of {J}acobian and Kinetic Regularization},
  year      = 2020
}
@inproceedings{peluchetti2020infinitely,
  author    = {Peluchetti, Stefano and Favaro, Stefano},
  booktitle = {International Conference on Artificial Intelligence and Statistics},
  pages     = {1126--1136},
  title     = {Infinitely deep neural networks as diffusion processes},
  year      = 2020
}
@inproceedings{jaini2019tails,
  author    = {Priyank Jaini and Ivan Kobyzev and Yaoliang Yu and Marcus A. Brubaker},
  booktitle = {International Conference on Machine Learning},
  pages     = {4673--4681},
  title     = {Tails of {L}ipschitz Triangular Flows},
  year      = 2020
}
@article{vahdat2020nvae,
  author  = {Vahdat, Arash and Kautz, Jan},
  journal = {Neural Information Processing Systems},
  pages   = {19667--19679},
  title   = {N{VAE}: A deep hierarchical variational autoencoder},
  volume  = 33,
  year    = 2020
}
@inproceedings{xu2020variational,
  author    = {Xu, Peng and Cheung, Jackie Chi Kit and Cao, Yanshuai},
  booktitle = {International Conference on Machine Learning},
  pages     = {10534--10543},
  title     = {On variational learning of controllable representations for text without supervision},
  year      = 2020
}
@inproceedings{vahdat2020undirected,
  author    = {Vahdat, Arash and Andriyash, Evgeny and Macready, William},
  booktitle = {International Conference on Machine Learning},
  pages     = {9680--9689},
  title     = {Undirected graphical models as approximate posteriors},
  year      = 2020
}
@article{lucke2020evidence,
  author  = {L{\"u}cke, J{\"o}rg and Forster, Dennis and Dai, Zhenwen},
  journal = {arXiv:2010.14860},
  title   = {The Evidence Lower Bound of Variational Autoencoders Converges to a Sum of Three Entropies},
  year    = 2020
}
@article{song2020improved,
  author  = {Song, Yang and Ermon, Stefano},
  journal = {Neural Information Processing Systems},
  pages   = {12438--12448},
  title   = {Improved techniques for training score-based generative models},
  volume  = 33,
  year    = 2020
}
@inproceedings{agarwal2020optimistic,
  author    = {Agarwal, Rishabh and Schuurmans, Dale and Norouzi, Mohammad},
  booktitle = {International Conference on Machine Learning},
  pages     = {104--114},
  title     = {An optimistic perspective on offline reinforcement learning},
  year      = 2020
}
@article{kumar2020conservative,
  author  = {Kumar, Aviral and Zhou, Aurick and Tucker, George and Levine, Sergey},
  journal = {Neural Information Processing Systems},
  pages   = {1179--1191},
  title   = {Conservative {Q}-learning for offline reinforcement learning},
  volume  = 33,
  year    = 2020
}
@article{stiennon2020learning,
  author  = {Stiennon, Nisan and Ouyang, Long and Wu, Jeffrey and Ziegler, Daniel and Lowe, Ryan and Voss, Chelsea and Radford, Alec and Amodei, Dario and Christiano, Paul F},
  journal = {Neural Information Processing Systems},
  pages   = {3008--3021},
  title   = {Learning to summarize with human feedback},
  volume  = 33,
  year    = 2020
}
@article{schrittwieser2020mastering,
  author    = {Schrittwieser, Julian and Antonoglou, Ioannis and Hubert, Thomas and Simonyan, Karen and Sifre, Laurent and Schmitt, Simon and Guez, Arthur and Lockhart, Edward and Hassabis, Demis and Graepel, Thore and others},
  journal   = {Nature},
  number    = 7839,
  pages     = {604--609},
  publisher = {Nature Publishing Group},
  title     = {Mastering {A}tari, {G}o, chess and shogi by planning with a learned model},
  volume    = 588,
  year      = 2020
}
@article{sejnowski2020unreasonable,
  abstract  = {Deep learning networks have been trained to recognize speech, caption photographs, and translate text between languages at high levels of performance. Although applications of deep learning networks to real-world problems have become ubiquitous, our understanding of why they are so effective is lacking. These empirical results should not be possible according to sample complexity in statistics and nonconvex optimization theory. However, paradoxes in the training and effectiveness of deep learning networks are being investigated and insights are being found in the geometry of high-dimensional spaces. A mathematical theory of deep learning would illuminate how they function, allow us to assess the strengths and weaknesses of different network architectures, and lead to major improvements. Deep learning has provided natural ways for humans to communicate with digital devices and is foundational for building artificial general intelligence. Deep learning was inspired by the architecture of the cerebral cortex and insights into autonomy and general intelligence may be found in other brain regions that are essential for planning and survival, but major breakthroughs will be needed to achieve these goals.There are no data associated with this paper.},
  author    = {Sejnowski, Terrence J.},
  doi       = {10.1073/pnas.1907373117},
  issn      = {0027-8424},
  journal   = {Proceedings of the National Academy of Sciences},
  number    = 48,
  pages     = {30033--30038},
  publisher = {National Academy of Sciences},
  title     = {The unreasonable effectiveness of deep learning in artificial intelligence},
  volume    = 117,
  year      = 2020
}
@article{zou2018stochastic,
  author  = {Difan Zou and Yuan Cao and Dongruo Zhou and Quanquan Gu},
  journal = {Machine Learning},
  pages   = {467--492},
  title   = {Gradient Descent Optimizes Over-parameterized Deep {R}e{LU} Networks},
  volume  = 109,
  year    = 2020
}
@article{bahri2020statistical,
  author    = {Bahri, Yasaman and Kadmon, Jonathan and Pennington, Jeffrey and Schoenholz, Sam S and Sohl-Dickstein, Jascha and Ganguli, Surya},
  journal   = {Annual Review of Condensed Matter Physics},
  pages     = {501--528},
  publisher = {Annual Reviews},
  title     = {Statistical mechanics of deep learning},
  volume    = 11,
  year      = 2020
}
@article{d2020underspecification,
  author  = {D’Amour, Alexander and Heller, Katherine and Moldovan, Dan and Adlam, Ben and Alipanahi, Babak and Beutel, Alex and Chen, Christina and Deaton, Jonathan and Eisenstein, Jacob and Hoffman, Matthew D and others},
  journal = {Journal of Machine Learning Research},
  pages   = {1--61},
  title   = {Underspecification presents challenges for credibility in modern machine learning},
  year    = 2020
}
@article{tomavsev2020ai,
  author  = {Toma{\v{s}}ev, Nenad and Cornebise, Julien and Hutter, Frank and Mohamed, Shakir and Picciariello, Angela and Connelly, Bec and Belgrave, Danielle CM and Ezer, Daphne and Haert, Fanny Cachat van der and Mugisha, Frank and others},
  journal = {Nature Communications},
  number  = 1,
  pages   = 2468,
  title   = {{AI} for social good: {U}nlocking the opportunity for positive impact},
  volume  = 11,
  year    = 2020
}
@article{Hagendorff-2019,
  author  = {Thilo Hagendorff},
  journal = {Minds and Machines},
  number  = 1,
  pages   = {99--120},
  title   = {The ethics of {AI} ethics: {A}n evaluation of guidelines},
  volume  = 30,
  year    = 2020
}
@book{Christian-2020,
  author    = {Brian Christian},
  publisher = {W. W. Norton},
  title     = {The Alignment Problem: Machine Learning and Human Values},
  year      = 2020
}
@article{Gabriel-2020,
  author  = {Iason Gabriel},
  journal = {Minds and Machines},
  pages   = {411--437},
  title   = {Artificial Intelligence, Values, and Alignment},
  volume  = 30,
  year    = 2020
}
@inproceedings{menon2020pulse,
  author    = {Sachit Menon and Alexandru Damian and Shijia Hu and Nikhil Ravi and Cynthia Rudin},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {2434--2442},
  title     = {{PULSE:} Self-Supervised Photo Upsampling via Latent Space Exploration of Generative Models},
  year      = 2020
}
@misc{vincent2020what,
  author       = {Vincent, James},
  howpublished = {The Verge, June 23, 2020. \url{https://www.theverge.com/21298762/face-depixelizer-ai-machine-learning-tool-pulse-stylegan-obama-bias}},
  title        = {What a machine learning tool that turns {O}bama white can (and can’t) tell us about {AI} bias / A striking image that only hints at a much bigger problem},
  year         = 2020
}
@article{Evans-et-al-2022,
  author  = {Evans, K. and de Moura, N. and Chauvier, S. and Chatila, R. and Dogan, E.},
  journal = {Science and Engineering Ethics},
  number  = 6,
  pages   = {3285--3312},
  title   = {Ethical decision making in autonomous vehicles: the {AV} ethics project},
  volume  = 26,
  year    = 2020
}
@article{Tolmeijer-et-al-2020,
  author  = {Suzanne Tolmeijer and Markus Kneer and Cristina Sarasua and Markus Christen and Abraham Bernstein},
  journal = {ACM Computing Surveys},
  number  = 6,
  pages   = {1--38},
  title   = {Implementations in Machine Ethics: A Survey},
  volume  = 53,
  year    = 2020
}
@article{Creel-2020,
  author  = {Kathleen A. Creel},
  journal = {Philosophy of Science},
  number  = 4,
  pages   = {568--589},
  title   = {Transparency in Complex Computational Systems},
  volume  = 87,
  year    = 2020
}
@article{Barrett-2020,
  author  = {Lindsey Barrett},
  journal = {Boston University Journal of Science and Technology Law},
  number  = 2,
  pages   = {223--285},
  title   = {Ban Facial Recognition Technologies for Children --- And for Everyone Else},
  volume  = 26,
  year    = 2020
}
@inproceedings{Raji-Fried-2021,
  author    = {Inioluwa Deborah Raji and Genevieve Fried},
  booktitle = {AAAI Workshop on AI Evaluation},
  title     = {About Face: A Survey of Facial Recognition Evaluation},
  year      = 2020
}
@article{Fetscherin-et-al-2020,
  author  = {Marc Fetscherin and Stacey Tantleff-Dunn and Arne Klumb},
  journal = {The Journal of Social Psychology},
  number  = 3,
  pages   = {332--345},
  title   = {Effects of facial features and styling elements on perceptions of competence, warmth, and hireability of male professionals},
  volume  = 160,
  year    = 2020
}
@article{Sisson-et-al-2020,
  author  = {Melanie Sisson and Jennifer Spindel and Paul Scharre and Vadim Kozyulin},
  journal = {United Nations Office for Disarmament Affairs},
  title   = {The Militarization of Artificial Intelligence},
  year    = 2020
}
@article{Mireshghallah-et-al-2020,
  author  = {Fatemehsadat Mireshghallah and Mohammadkazem Taram and Praneeth Vepakomma and Abhishek Singh and Ramesh Raskar and Hadi Esmaeilzadeh},
  journal = {arXiv:2004.12254},
  title   = {Privacy in Deep Learning: A Survey},
  year    = 2020
}
@article{Boulemtafes-et-al-2020,
  author  = {Amine Boulemtafes and Abdelouahid Derhab and Yacine Challal},
  journal = {Neurocomputing},
  pages   = {21--45},
  title   = {A Review of Privacy-preserving Techniques for Deep Learning},
  volume  = 384,
  year    = 2020
}
@article{Strubell-et-al-2020,
  author  = {Emma Strubell and Ananya Ganesh and Andrew McCallum},
  journal = {Meeting of the Association for Computational Linguistics},
  pages   = {13693--13696},
  title   = {Energy and Policy Considerations for Modern Deep Learning Research},
  year    = 2020
}
@misc{Riedl-2020,
  author       = {Mark Riedl},
  howpublished = {The Gradient, Sept 25, 2020. \url{https://thegradient.pub/ai-democratization-in-the-era-of-gpt-3/}},
  title        = {{AI} Democratization in the Era of {GPT-3}},
  year         = 2020
}
@article{Ahmed-et-al-2020,
  author  = {Shakeel Ahmed and Ravi S. Mula and Soma S. Dhavala},
  journal = {arXiv:2001.00818},
  title   = {A Framework for Democratizing {AI}},
  year    = 2020
}
@book{Costanza-Chock-2020,
  address   = {Cambridge, MA},
  author    = {Sasha Constanza-Chock},
  publisher = {The MIT Press},
  title     = {Design Justice: Community-Led Practices to Build the Worlds We Need},
  year      = 2020
}
@book{Veliz-2020,
  author    = {Carissa V{\'e}liz},
  publisher = {Bantam Press},
  title     = {Privacy is Power: Why and How You Should Take Back Control of Your Data},
  year      = 2020
}
@misc{dao2021awful,
  author       = {Dao, David},
  howpublished = {Github. {R}etrieved {J}anuary 17, 2023. \url{https://github.com/daviddao/awful-ai}},
  title        = {{\em {A}wful {AI}}},
  year         = 2021
}
@article{liu2021variance,
  author  = {Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han},
  journal = {International Conference on Learning Representations},
  title   = {On the Variance of the Adaptive Learning Rate and Beyond},
  year    = 2021
}
@article{zhu2021gradinit,
  author  = {Zhu, Chen and Ni, Renkun and Xu, Zheng and Kong, Kezhi and Huang, W Ronny and Goldstein, Tom},
  journal = {Neural Information Processing Systems},
  pages   = {16410--16422},
  title   = {Grad{I}nit: Learning to initialize neural networks for stable and efficient training},
  volume  = 34,
  year    = 2021
}
@article{xu2021optimizing,
  author  = {Xu, Peng and Kumar, Dhruv and Yang, Wei and Zi, Wenjie and Tang, Keyi and Huang, Chenyang and Cheung, Jackie Chi Kit and Prince, Simon Jeremy Damion and Cao, Yanshuai},
  journal = {Meeting of the Association for Computational Linguistics},
  title   = {Optimizing deeper transformers on small datasets},
  year    = 2021
}
@inproceedings{narayanan2021memoryefficient,
  author    = {Narayanan, Deepak and Phanishayee, Amar and Shi, Kaiyu and Chen, Xie and Zaharia, Matei},
  booktitle = {International Conference on Machine Learning},
  pages     = {7937--7947},
  title     = {Memory-efficient pipeline-parallel {DNN} training},
  year      = 2021
}
@inproceedings{narayanan2021efficient,
  author    = {Narayanan, Deepak and Shoeybi, Mohammad and Casper, Jared and LeGresley, Patrick and Patwary, Mostofa and Korthikanti, Vijay and Vainbrand, Dmitri and Kashinkunti, Prethvi and Bernauer, Julie and Catanzaro, Bryan and others},
  booktitle = {International Conference for High Performance Computing, Networking, Storage and Analysis},
  pages     = {1--15},
  title     = {Efficient large-scale language model training on {GPU} clusters using {M}egatron-{LM}},
  year      = 2021
}
@article{nakkiran2019deep,
  author    = {Nakkiran, Preetum and Kaplun, Gal and Bansal, Yamini and Yang, Tristan and Barak, Boaz and Sutskever, Ilya},
  journal   = {Journal of Statistical Mechanics: Theory and Experiment},
  number    = 12,
  pages     = 124003,
  publisher = {IOP Publishing},
  title     = {Deep double descent: {W}here bigger models and more data hurt},
  volume    = 2021,
  year      = 2021
}
@article{buschjager2021doubledescent,
  author  = {Buschj{\"a}ger, Sebastian and Morik, Katharina},
  journal = {arXiv:2111.04409},
  title   = {There is no Double-Descent in Random Forests},
  year    = 2021
}
@article{bubeck2021universal,
  author  = {Bubeck, S{\'e}bastien and Sellke, Mark},
  journal = {Neural Information Processing Systems},
  pages   = {28811--28822},
  title   = {A universal law of robustness via isoperimetry},
  volume  = 34,
  year    = 2021
}
@article{dar2021farewell,
  author  = {Dar, Yehuda and Muthukumar, Vidya and Baraniuk, Richard G},
  journal = {arXiv:2109.02355},
  title   = {A farewell to the bias-variance tradeoff? {A}n overview of the theory of overparameterized machine learning},
  year    = 2021
}
@article{schwarz2021powerpropagation,
  author  = {Schwarz, Jonathan and Jayakumar, Siddhant and Pascanu, Razvan and Latham, Peter and Teh, Yee},
  journal = {Neural Information Processing Systems},
  pages   = {28889--28903},
  title   = {Powerpropagation: A sparsity inducing weight reparameterisation},
  volume  = 34,
  year    = 2021
}
@article{gouk2018regularisation,
  author  = {Gouk, Henry and Frank, Eibe and Pfahringer, Bernhard and Cree, Michael J.},
  journal = {Machine Learning},
  number  = 2,
  pages   = {393-–416},
  title   = {Regularisation of Neural Networks by Enforcing {L}ipschitz Continuity},
  volume  = 110,
  year    = 2021
}
@article{barrett2021implicit,
  author  = {David G. T. Barrett and Benoit Dherin},
  journal = {International Conference on Learning Representations},
  title   = {Implicit Gradient Regularization},
  year    = 2021
}
@inproceedings{smith2021origin,
  author    = {Samuel L. Smith and Benoit Dherin and David G. T. Barrett and Soham De},
  booktitle = {International Conference on Learning Representations},
  title     = {On the Origin of Implicit Regularization in Stochastic Gradient Descent},
  year      = 2021
}
@inproceedings{jastrzebski2021catastrophic,
  author    = {Jastrz{\k{e}}bski, Stanis{\l}aw and Arpit, Devansh and Astrand, Oliver and Kerg, Giancarlo B and Wang, Huan and Xiong, Caiming and Socher, Richard and Cho, Kyunghyun and Geras, Krzysztof J},
  booktitle = {International Conference on Machine Learning},
  pages     = {4772--4784},
  title     = {Catastrophic fisher explosion: Early phase fisher matrix impacts generalization},
  year      = 2021
}
@article{cohen2021gradient,
  author  = {Cohen, Jeremy M and Kaur, Simran and Li, Yuanzhi and Kolter, J Zico and Talwalkar, Ameet},
  journal = {International Conference on Learning Representations},
  title   = {Gradient descent on neural networks typically occurs at the edge of stability},
  year    = 2021
}
@article{noci2021disentangling,
  author  = {Noci, Lorenzo and Roth, Kevin and Bachmann, Gregor and Nowozin, Sebastian and Hofmann, Thomas},
  journal = {Neural Information Processing Systems},
  pages   = {12738--12748},
  title   = {Disentangling the Roles of Curation, Data-Augmentation and the Prior in the Cold Posterior Effect},
  volume  = 34,
  year    = 2021
}
@article{bousselham2021efficient,
  author  = {Bousselham, Walid and Thibault, Guillaume and Pagano, Lucas and Machireddy, Archana and Gray, Joe and Chang, Young Hwan and Song, Xubo},
  journal = {arXiv:2111.13280},
  title   = {Efficient Self-Ensemble Framework for Semantic Segmentation},
  year    = 2021
}
@article{kiranyaz20211d,
  author  = {Kiranyaz, Serkan and Avci, Onur and Abdeljaber, Osama and Ince, Turker and Gabbouj, Moncef and Inman, Daniel J},
  journal = {Mechanical Systems and Signal Processing},
  pages   = 107398,
  title   = {{1D} convolutional neural networks and applications: A survey},
  volume  = 151,
  year    = 2021
}
@article{pintea2021resolution,
  author    = {Pintea, Silvia L and T{\"o}men, Nergis and Goes, Stanley F and Loog, Marco and van Gemert, Jan C},
  journal   = {IEEE Transactions on Image Processing},
  pages     = {8342--8353},
  publisher = {IEEE},
  title     = {Resolution learning in deep convolutional networks using scale-space theory},
  volume    = 30,
  year      = 2021
}
@article{romero2021flexconv,
  author  = {Romero, David W and Bruintjes, Robert-Jan and Tomczak, Jakub M and Bekkers, Erik J and Hoogendoorn, Mark and van Gemert, Jan C},
  journal = {International Conference on Learning Representations},
  title   = {Flex{C}onv: Continuous kernel convolutions with differentiable kernel sizes},
  year    = 2021
}
@article{minaee2021image,
  author    = {Minaee, Shervin and Boykov, Yuri Y and Porikli, Fatih and Plaza, Antonio J and Kehtarnavaz, Nasser and Terzopoulos, Demetri},
  journal   = {IEEE Transactions on Pattern Analysis \& Machine Intelligence},
  number    = 7,
  pages     = {3523--3542},
  publisher = {IEEE},
  title     = {Image segmentation using deep learning: A survey},
  volume    = 44,
  year      = 2021
}
@article{wightman2021resnet,
  author  = {Wightman, Ross and Touvron, Hugo and J{\'e}gou, Herv{\'e}},
  journal = {Neural Information Processing Systems Workshop},
  title   = {Res{N}et strikes back: An improved training procedure in timm},
  year    = 2021
}
@article{lubana2021beyond,
  author  = {Lubana, Ekdeep S and Dick, Robert and Tanaka, Hidenori},
  journal = {Neural Information Processing Systems},
  pages   = {4778--4791},
  title   = {Beyond {B}atch{N}orm: {T}owards a unified understanding of normalization in deep learning},
  volume  = 34,
  year    = 2021
}
@inproceedings{hayou2021stable,
  author    = {Hayou, Soufiane and Clerico, Eugenio and He, Bobby and Deligiannidis, George and Doucet, Arnaud and Rousseau, Judith},
  booktitle = {International Conference on Artificial Intelligence and Statistics},
  pages     = {1324--1332},
  title     = {Stable {R}es{N}et},
  year      = 2021
}
@article{siddique2021u,
  author  = {Siddique, Nahian and Paheding, Sidike and Elkin, Colin P and Devabhaktuni, Vijay},
  journal = {IEEE Access},
  pages   = {82031--82057},
  title   = {U-{N}et and its variants for medical image segmentation: A review of theory and applications},
  year    = 2021
}
@inproceedings{liu2021swin,
  author    = {Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {10012--10022},
  title     = {Swin transformer: Hierarchical vision transformer using shifted windows},
  year      = 2021
}
@article{narang2021transformer,
  author  = {Narang, Sharan and Chung, Hyung Won and Tay, Yi and Fedus, William and Fevry, Thibault and Matena, Michael and Malkan, Karishma and Fiedel, Noah and Shazeer, Noam and Lan, Zhenzhong and others},
  journal = {Empirical Methods in Natural Language Processing},
  pages   = {5758--5773},
  title   = {Do transformer modifications transfer across implementations and applications?},
  year    = 2021
}
@article{dosovitskiy2020image,
  author  = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others},
  journal = {International Conference on Learning Representations},
  title   = {An image is worth 16x16 words: Transformers for image recognition at scale},
  year    = 2021
}
@article{rives2021biological,
  author    = {Rives, Alexander and Meier, Joshua and Sercu, Tom and Goyal, Siddharth and Lin, Zeming and Liu, Jason and Guo, Demi and Ott, Myle and Zitnick, C Lawrence and Ma, Jerry and others},
  journal   = {Proceedings of the National Academy of Sciences},
  number    = 15,
  publisher = {National Acad Sciences},
  title     = {Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences},
  volume    = 118,
  year      = 2021
}
@article{rae2021scaling,
  author  = {Rae, Jack W and Borgeaud, Sebastian and Cai, Trevor and Millican, Katie and Hoffmann, Jordan and Song, Francis and Aslanides, John and Henderson, Sarah and Ring, Roman and Young, Susannah and others},
  journal = {arXiv:2112.11446},
  title   = {Scaling language models: Methods, analysis \& insights from training {G}opher},
  year    = 2021
}
@article{ribeiro2020beyond,
  author  = {Ribeiro, Marco Tulio and Wu, Tongshuang and Guestrin, Carlos and Singh, Sameer},
  journal = {},
  pages   = {4824--4828},
  title   = {Beyond accuracy: Behavioral testing of {NLP} models with {C}heck{L}ist},
  year    = 2021
}
@inproceedings{bowman2021will,
  author    = {Samuel R. Bowman and George E. Dahl},
  booktitle = {ACL Human Language Technologies},
  pages     = {4843--4855},
  title     = {What Will it Take to Fix Benchmarking in Natural Language Understanding?},
  year      = 2021
}
@article{dehghani2021benchmark,
  author  = {Dehghani, Mostafa and Tay, Yi and Gritsenko, Alexey A and Zhao, Zhe and Houlsby, Neil and Diaz, Fernando and Metzler, Donald and Vinyals, Oriol},
  journal = {arXiv:2107.07002},
  title   = {The benchmark lottery},
  year    = 2021
}
@misc{zhai2021attention,
  author  = {Shuangfei Zhai and Walter Talbott and Nitish Srivastava and Chen Huang and Hanlin Goh and Ruixiang Zhang and Josh Susskind},
  journal = {arXiv:2105.14103},
  title   = {An Attention Free Transformer},
  year    = 2021
}
@inproceedings{tay2021synthesizer,
  author    = {Tay, Yi and Bahri, Dara and Metzler, Donald and Juan, Da-Cheng and Zhao, Zhe and Zheng, Che},
  booktitle = {International Conference on Machine Learning},
  pages     = {10183--10192},
  title     = {Synthesizer: Rethinking self-attention for transformer models},
  year      = 2021
}
@inproceedings{schlag2021linear,
  author    = {Schlag, Imanol and Irie, Kazuki and Schmidhuber, J{\"u}rgen},
  booktitle = {International Conference on Machine Learning},
  pages     = {9355--9366},
  title     = {Linear transformers are secretly fast weight programmers},
  year      = 2021
}
@article{ramsauer2020hopfield,
  author  = {Ramsauer, Hubert and Sch{\"a}fl, Bernhard and Lehner, Johannes and Seidl, Philipp and Widrich, Michael and Adler, Thomas and Gruber, Lukas and Holzleitner, Markus and Pavlovi{\'c}, Milena and Sandve, Geir Kjetil and others},
  journal = {International Conference on Learning Representations},
  title   = {Hopfield networks is all you need},
  year    = 2021
}
@misc{prince2021transext,
  author       = {Prince, Simon Jeremy Damion},
  howpublished = {\url{https://www.borealisai.com/en/blog/tutorial-16-transformers-ii-extensions/}},
  title        = {Transformers {II}: {E}xtensions},
  year         = 2021
}
@article{ke2020rethinking,
  author  = {Ke, Guolin and He, Di and Liu, Tie-Yan},
  journal = {International Conference on Learning Representations},
  title   = {Rethinking positional encoding in language pre-training},
  year    = 2021
}
@article{he2020deberta,
  author  = {He, Pengcheng and Liu, Xiaodong and Gao, Jianfeng and Chen, Weizhu},
  journal = {International Conference on Learning Representations},
  title   = {De{BERT}a: Decoding-enhanced {BERT} with disentangled attention},
  year    = 2021
}
@article{su2021roformer,
  author  = {Su, Jianlin and Lu, Yu and Pan, Shengfeng and Wen, Bo and Liu, Yunfeng},
  journal = {arXiv:2104.09864},
  title   = {Roformer: Enhanced transformer with rotary position embedding},
  year    = 2021
}
@article{dufter2021position,
  author  = {Dufter, Philipp and Schmitt, Martin and Sch{\"u}tze, Hinrich},
  journal = {Computational Linguistics},
  pages   = {1--31},
  title   = {Position information in transformers: An overview},
  year    = 2021
}
@article{roy2021efficient,
  author    = {Roy, Aurko and Saffar, Mohammad and Vaswani, Ashish and Grangier, David},
  journal   = {Transactions of the Association for Computational Linguistics},
  pages     = {53--68},
  publisher = {MIT Press},
  title     = {Efficient content-based sparse attention with routing transformers},
  volume    = 9,
  year      = 2021
}
@inproceedings{bachlechner2021rezero,
  author    = {Bachlechner, Thomas and Majumder, Bodhisattwa Prasad and Mao, Henry and Cottrell, Gary and McAuley, Julian},
  booktitle = {Uncertainty in Artificial Intelligence},
  pages     = {1352--1361},
  title     = {Re{Z}ero is all you need: Fast convergence at large depth},
  year      = 2021
}
@misc{prince2021transtrain,
  author       = {Prince, Simon Jeremy Damion},
  howpublished = {\url{https://www.borealisai.com/en/blog/tutorial-17-transformers-iii-training/}},
  title        = {Transformers {III}: {T}raining},
  year         = 2021
}
@inproceedings{touvron2021training,
  author    = {Touvron, Hugo and Cord, Matthieu and Douze, Matthijs and Massa, Francisco and Sablayrolles, Alexandre and J{\'e}gou, Herv{\'e}},
  booktitle = {International Conference on Machine Learning},
  pages     = {10347--10357},
  title     = {Training data-efficient image transformers \& distillation through attention},
  year      = 2021
}
@article{fang2021you,
  author  = {Fang, Yuxin and Liao, Bencheng and Wang, Xinggang and Fang, Jiemin and Qi, Jiyang and Wu, Rui and Niu, Jianwei and Liu, Wenyu},
  journal = {Neural Information Processing Systems},
  pages   = {26183--26197},
  title   = {You only look at one sequence: Rethinking transformer in vision through object detection},
  volume  = 34,
  year    = 2021
}
@article{xie2021segformer,
  author  = {Xie, Enze and Wang, Wenhai and Yu, Zhiding and Anandkumar, Anima and Alvarez, Jose M and Luo, Ping},
  journal = {Neural Information Processing Systems},
  pages   = {12077--12090},
  title   = {Seg{F}ormer: Simple and efficient design for semantic segmentation with transformers},
  volume  = 34,
  year    = 2021
}
@inproceedings{chen2021pre,
  author    = {Chen, Hanting and Wang, Yunhe and Guo, Tianyu and Xu, Chang and Deng, Yiping and Liu, Zhenhua and Ma, Siwei and Xu, Chunjing and Xu, Chao and Gao, Wen},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {12299--12310},
  title     = {Pre-trained image processing transformer},
  year      = 2021
}
@article{nash2021generating,
  author  = {Nash, Charlie and Menick, Jacob and Dieleman, Sander and Battaglia, Peter W},
  journal = {International Conference on Machine Learning},
  pages   = {7958--7968},
  title   = {Generating images with sparse representations},
  year    = 2021
}
@inproceedings{wan2021high,
  author    = {Wan, Ziyu and Zhang, Jingbo and Chen, Dongdong and Liao, Jing},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {4692--4701},
  title     = {High-fidelity pluralistic image completion with transformers},
  year      = 2021
}
@article{zheng2021tfill,
  author  = {Zheng, Chuanxia and Cham, Tat-Jen and Cai, Jianfei},
  journal = {arXiv:2104.00845},
  title   = {T{F}ill: Image completion via a transformer-based architecture},
  year    = 2021
}
@article{kumar2021colorization,
  author  = {Kumar, Manoj and Weissenborn, Dirk and Kalchbrenner, Nal},
  journal = {International Conference on Learning Representations},
  title   = {Colorization transformer},
  year    = 2021
}
@article{schuhmann2021laion,
  author  = {Schuhmann, Christoph and Vencu, Richard and Beaumont, Romain and Kaczmarczyk, Robert and Mullis, Clayton and Katta, Aarush and Coombes, Theo and Jitsev, Jenia and Komatsuzaki, Aran},
  journal = {NeurIPS Workshop on Data-centric AI},
  title   = {Laion-400m: Open dataset of clip-filtered 400 million image-text pairs},
  year    = 2021
}
@inproceedings{fan2021multiscale,
  author    = {Fan, Haoqi and Xiong, Bo and Mangalam, Karttikeya and Li, Yanghao and Yan, Zhicheng and Malik, Jitendra and Feichtenhofer, Christoph},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {6824--6835},
  title     = {Multiscale vision transformers},
  year      = 2021
}
@inproceedings{wang2021pyramid,
  author    = {Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {568--578},
  title     = {Pyramid vision transformer: A versatile backbone for dense prediction without convolutions},
  year      = 2021
}
@article{ali2021xcit,
  author  = {Ali, Alaaeldin and Touvron, Hugo and Caron, Mathilde and Bojanowski, Piotr and Douze, Matthijs and Joulin, Armand and Laptev, Ivan and Neverova, Natalia and Synnaeve, Gabriel and Verbeek, Jakob and others},
  journal = {Neural Information Processing Systems},
  pages   = {20014--20027},
  title   = {X{C}i{T}: Cross-covariance image transformers},
  volume  = 34,
  year    = 2021
}
@article{chu2021twins,
  author  = {Chu, Xiangxiang and Tian, Zhi and Wang, Yuqing and Zhang, Bo and Ren, Haibing and Wei, Xiaolin and Xia, Huaxia and Shen, Chunhua},
  journal = {Neural Information Processing Systems},
  pages   = {9355--9366},
  title   = {Twins: Revisiting the design of spatial attention in vision transformers},
  volume  = 34,
  year    = 2021
}
@inproceedings{arnab2021vivit,
  author    = {Arnab, Anurag and Dehghani, Mostafa and Heigold, Georg and Sun, Chen and Lu{\v{c}}i{\'c}, Mario and Schmid, Cordelia},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {6836--6846},
  title     = {Vi{V}it: A video vision transformer},
  year      = 2021
}
@inproceedings{bertasius2021space,
  author    = {Bertasius, Gedas and Wang, Heng and Torresani, Lorenzo},
  booktitle = {International Conference on Machine Learning},
  number    = 3,
  pages     = {813--824},
  title     = {Is space-time attention all you need for video understanding?},
  year      = 2021
}
@inproceedings{neimark2021video,
  author    = {Neimark, Daniel and Bar, Omri and Zohar, Maya and Asselmann, Dotan},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {3163--3172},
  title     = {Video transformer network},
  year      = 2021
}
@article{patrick2021keeping,
  author  = {Patrick, Mandela and Campbell, Dylan and Asano, Yuki and Misra, Ishan and Metze, Florian and Feichtenhofer, Christoph and Vedaldi, Andrea and Henriques, Jo{\~a}o F},
  journal = {Neural Information Processing Systems},
  pages   = {12493--12506},
  title   = {Keeping your eye on the ball: Trajectory attention in video transformers},
  volume  = 34,
  year    = 2021
}
@inproceedings{radford2021learning,
  author    = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others},
  booktitle = {International Conference on Machine Learning},
  pages     = {8748--8763},
  title     = {Learning transferable visual models from natural language supervision},
  year      = 2021
}
@inproceedings{ramesh2021zero,
  author    = {Ramesh, Aditya and Pavlov, Mikhail and Goh, Gabriel and Gray, Scott and Voss, Chelsea and Radford, Alec and Chen, Mark and Sutskever, Ilya},
  booktitle = {International Conference on Machine Learning},
  pages     = {8821--8831},
  title     = {Zero-shot text-to-image generation},
  year      = 2021
}
@misc{sanchez2021gentle,
  author       = {Sanchez-Lengeling, Benjamin and Reif, Emily and Pearce, Adam and Wiltschko, Alexander B.},
  howpublished = {Distill, \url{https://distill.pub/2021/gnn-intro/}},
  journal      = {Distill},
  title        = {A Gentle Introduction to Graph Neural Networks},
  year         = 2021
}
@misc{daigavane2021understanding,
  author       = {Daigavane, Ameya and Balaraman, Ravindran and Aggarwal, Gaurav},
  howpublished = {Distill, \url{https://distill.pub/2021/understanding-gnns/}},
  journal      = {Distill},
  title        = {Understanding Convolutions on Graphs},
  year         = 2021
}
@book{ma2021deep,
  author    = {Yao Ma and Jiliang Tang},
  publisher = {Cambridge University Press},
  title     = {Deep learning on graphs},
  year      = 2021
}
@article{bronstein2021geometric,
  author  = {Bronstein, Michael M and Bruna, Joan and Cohen, Taco and Veli{\v{c}}kovi{\'c}, Petar},
  journal = {arXiv:2104.13478},
  title   = {Geometric deep learning: Grids, groups, graphs, geodesics, and gauges},
  year    = 2021
}
@article{wolfe2021gist,
  author  = {Wolfe, Cameron R and Yang, Jingkang and Chowdhury, Arindam and Dun, Chen and Bayer, Artun and Segarra, Santiago and Kyrillidis, Anastasios},
  journal = {NeurIPS Workshop on New Frontiers in Graph Learning},
  title   = {{GIST}: Distributed training for large-scale graph convolutional networks},
  year    = 2021
}
@inproceedings{cai2021graphnorm,
  author    = {Cai, Tianle and Luo, Shengjie and Xu, Keyulu and He, Di and Liu, Tie-yan and Wang, Liwei},
  booktitle = {International Conference on Machine Learning},
  pages     = {1204--1215},
  title     = {Graph{N}orm: A principled approach to accelerating graph neural network training},
  year      = 2021
}
@article{alon2020bottleneck,
  author  = {Alon, Uri and Yahav, Eran},
  journal = {International Conference on Learning Representations},
  title   = {On the bottleneck of graph neural networks and its practical implications},
  year    = 2021
}
@article{li2021deepgcns,
  author    = {Li, Guohao and M{\"u}ller, Matthias and Qian, Guocheng and Perez, Itzel Carolina Delgadillo and Abualshour, Abdulellah and Thabet, Ali Kassem and Ghanem, Bernard},
  journal   = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  publisher = {IEEE},
  title     = {Deep{GCN}s: Making {GCN}s go as deep as {CNN}s},
  year      = 2021
}
@inproceedings{xu2021optimization,
  author    = {Xu, Keyulu and Zhang, Mozhi and Jegelka, Stefanie and Kawaguchi, Kenji},
  booktitle = {International Conference on Machine Learning},
  pages     = {11592--11602},
  title     = {Optimization of graph neural networks: Implicit acceleration by skip connections and more depth},
  year      = 2021
}
@inproceedings{li2021training,
  author    = {Li, Guohao and M{\"u}ller, Matthias and Ghanem, Bernard and Koltun, Vladlen},
  booktitle = {International Conference on Machine Learning},
  pages     = {6437--6449},
  title     = {Training graph neural networks with 1000 layers},
  year      = 2021
}
@misc{wolf2018how,
  author       = {Wolf, Sarah},
  howpublished = {\url{https://towardsdatascience.com/progan-how-nvidia-generated-images-of-unprecedented-quality-51c98ec2cbd2}},
  title        = {Pro{GAN}: How {NVIDIA} Generated Images of Unprecedented Quality},
  year         = 2021
}
@article{gui2021review,
  author    = {Gui, Jie and Sun, Zhenan and Wen, Yonggang and Tao, Dacheng and Ye, Jieping},
  journal   = {IEEE Transactions on Knowledge and Data Engineering},
  publisher = {IEEE},
  title     = {A review on generative adversarial networks: Algorithms, theory, and applications},
  year      = 2021
}
@article{park2021review,
  author    = {Park, Sung-Wook and Ko, Jae-Sub and Huh, Jun-Ho and Kim, Jong-Chan},
  journal   = {Electronics},
  number    = 10,
  pages     = 1216,
  publisher = {MDPI},
  title     = {Review on generative adversarial networks: {F}ocusing on computer vision and its applications},
  volume    = 10,
  year      = 2021
}
@article{karras2021alias,
  author  = {Karras, Tero and Aittala, Miika and Laine, Samuli and H{\"a}rk{\"o}nen, Erik and Hellsten, Janne and Lehtinen, Jaakko and Aila, Timo},
  journal = {Neural Information Processing Systems},
  pages   = {852--863},
  title   = {Alias-free generative adversarial networks},
  volume  = 34,
  year    = 2021
}
@article{abdal2021styleflow,
  author    = {Abdal, Rameen and Zhu, Peihao and Mitra, Niloy J and Wonka, Peter},
  journal   = {ACM Transactions on Graphics (ToG)},
  number    = 3,
  pages     = {1--21},
  publisher = {ACM New York, NY},
  title     = {Style{F}low: Attribute-conditioned exploration of {S}tyle{GAN}-generated images using conditional continuous normalizing flows},
  volume    = 40,
  year      = 2021
}
@inproceedings{patashnik2021styleclip,
  author    = {Patashnik, Or and Wu, Zongze and Shechtman, Eli and Cohen-Or, Daniel and Lischinski, Dani},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {2085--2094},
  title     = {Style{CLIP}: Text-driven manipulation of {S}tyle{GAN} imagery},
  year      = 2021
}
@inproceedings{wu2021stylespace,
  author    = {Wu, Zongze and Lischinski, Dani and Shechtman, Eli},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {12863--12872},
  title     = {Stylespace analysis: Disentangled controls for {S}tyle{GAN} image generation},
  year      = 2021
}
@inproceedings{esser2021taming,
  author    = {Patrick Esser and Robin Rombach and Bj{\"{o}}rn Ommer},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {12873--12883},
  title     = {Taming Transformers for High-Resolution Image Synthesis},
  year      = 2021
}
@inproceedings{richardson2021encoding,
  author    = {Richardson, Elad and Alaluf, Yuval and Patashnik, Or and Nitzan, Yotam and Azar, Yaniv and Shapiro, Stav and Cohen-Or, Daniel},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {2287--2296},
  title     = {Encoding in style: {A} {S}tyle{GAN} encoder for image-to-image translation},
  year      = 2021
}
@article{ling2021editgan,
  author  = {Ling, Huan and Kreis, Karsten and Li, Daiqing and Kim, Seung Wook and Torralba, Antonio and Fidler, Sanja},
  journal = {Neural Information Processing Systems},
  pages   = {16331--16345},
  title   = {Edit{GAN}: High-precision semantic image editing},
  volume  = 34,
  year    = 2021
}
@article{papamakarios2021normalizing,
  author  = {Papamakarios, George and Nalisnick, Eric T and Rezende, Danilo Jimenez and Mohamed, Shakir and Lakshminarayanan, Balaji},
  journal = {Journal of Machine Learning Research},
  number  = 57,
  pages   = {1--64},
  title   = {Normalizing Flows for Probabilistic Modeling and Inference},
  volume  = 22,
  year    = 2021
}
@inproceedings{kim2021quality,
  author    = {Kim, Insoo and Han, Seungju and Baek, Ji-won and Park, Seong-Jin and Han, Jae-Joon and Shin, Jinwoo},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {12257--12266},
  title     = {Quality-agnostic image recognition via invertible decoder},
  year      = 2021
}
@inproceedings{mackowiak2021generative,
  author    = {Mackowiak, Radek and Ardizzone, Lynton and Kothe, Ullrich and Rother, Carsten},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {2971--2981},
  title     = {Generative classifiers as a basis for trustworthy image classification},
  year      = 2021
}
@inproceedings{lu2021large,
  author    = {Lu, Shao-Ping and Wang, Rong and Zhong, Tao and Rosin, Paul L},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {10816--10825},
  title     = {Large-capacity image steganography based on invertible neural networks},
  year      = 2021
}
@inproceedings{wolf2021deflow,
  author    = {Wolf, Valentin and Lugmayr, Andreas and Danelljan, Martin and Van Gool, Luc and Timofte, Radu},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {94--103},
  title     = {De{F}low: Learning complex image degradations from unpaired data with conditional flows},
  year      = 2021
}
@inproceedings{liang2021flow,
  author    = {Liang, Jingyun and Zhang, Kai and Gu, Shuhang and Van Gool, Luc and Timofte, Radu},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {10601--10610},
  title     = {Flow-based kernel prior with application to blind super-resolution},
  year      = 2021
}
@inproceedings{an2021artflow,
  author    = {An, Jie and Huang, Siyu and Song, Yibing and Dou, Dejing and Liu, Wei and Luo, Jiebo},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {862--871},
  title     = {Art{F}low: Unbiased image style transfer via reversible neural flows},
  year      = 2021
}
@inproceedings{wen2021autoregressive,
  author    = {Wen, Yu-Hui and Yang, Zhipeng and Fu, Hongbo and Gao, Lin and Sun, Yanan and Liu, Yong-Jin},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {13612--13621},
  title     = {Autoregressive stylized motion synthesis with generative flow},
  year      = 2021
}
@inproceedings{paschalidou2021neural,
  author    = {Paschalidou, Despoina and Katharopoulos, Angelos and Geiger, Andreas and Fidler, Sanja},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {3204--3215},
  title     = {Neural parts: Learning expressive 3{D} shape abstractions with invertible neural networks},
  year      = 2021
}
@inproceedings{zhang2021ivpf,
  author    = {Zhang, Shifeng and Zhang, Chen and Kang, Ning and Li, Zhenguo},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {620--629},
  title     = {i{VPF}: Numerical invertible volume preserving flow for efficient lossless compression},
  year      = 2021
}
@inproceedings{xing2021invertible,
  author    = {Xing, Yazhou and Qian, Zian and Chen, Qifeng},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {6287--6296},
  title     = {Invertible image signal processing},
  year      = 2021
}
@inproceedings{liu2021invertible,
  author    = {Liu, Yang and Qin, Zhenyue and Anwar, Saeed and Ji, Pan and Kim, Dongwoo and Caldwell, Sabrina and Gedeon, Tom},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {13365--13374},
  title     = {Invertible denoising network: A light solution for real noise removal},
  year      = 2021
}
@article{yu2021fastflow,
  author  = {Yu, Jiawei and Zheng, Ye and Wang, Xiang and Li, Wei and Wu, Yushuang and Zhao, Rui and Wu, Liwei},
  journal = {arXiv:2111.07677},
  title   = {Fast{F}low: Unsupervised anomaly detection and localization via 2{D} normalizing flows},
  year    = 2021
}
@inproceedings{yang2021mol2image,
  author    = {Yang, Karren and Goldman, Samuel and Jin, Wengong and Lu, Alex X and Barzilay, Regina and Jaakkola, Tommi and Uhler, Caroline},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {6688--6698},
  title     = {Mol2{I}mage: Improved Conditional Flow Models for Molecule to Image Synthesis},
  year      = 2021
}
@inproceedings{kruse2021hint,
  author    = {Kruse, Jakob and Detommaso, Gianluca and K{\"o}the, Ullrich and Scheichl, Robert},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {8191--8199},
  title     = {{HINT}: Hierarchical invertible neural transport for density estimation and {B}ayesian inference},
  year      = 2021
}
@article{dhariwal2021diffusion,
  author  = {Dhariwal, Prafulla and Nichol, Alexander},
  journal = {Neural Information Processing Systems},
  pages   = {8780--8794},
  title   = {Diffusion models beat {GAN}s on image synthesis},
  volume  = 34,
  year    = 2021
}
@article{meng2021sdedit,
  author  = {Meng, Chenlin and Song, Yang and Song, Jiaming and Wu, Jiajun and Zhu, Jun-Yan and Ermon, Stefano},
  journal = {International Conference on Learning Representations},
  title   = {{SDE}dit: Image synthesis and editing with stochastic differential equations},
  year    = 2021
}
@article{song2021solving,
  author  = {Song, Yang and Shen, Liyue and Xing, Lei and Ermon, Stefano},
  journal = {International Conference on Learning Representations},
  title   = {Solving inverse problems in medical imaging with score-based generative models},
  year    = 2021
}
@inproceedings{zhou20213d,
  author    = {Zhou, Linqi and Du, Yilun and Wu, Jiajun},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {5826--5835},
  title     = {3{D} shape generation and completion through point-voxel diffusion},
  year      = 2021
}
@inproceedings{luo2021diffusion,
  author    = {Luo, Shitong and Hu, Wei},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {2837--2845},
  title     = {Diffusion probabilistic models for 3{D} point cloud generation},
  year      = 2021
}
@article{austin2021structured,
  author  = {Austin, Jacob and Johnson, Daniel D and Ho, Jonathan and Tarlow, Daniel and van den Berg, Rianne},
  journal = {Neural Information Processing Systems},
  pages   = {17981--17993},
  title   = {Structured denoising diffusion models in discrete state-spaces},
  volume  = 34,
  year    = 2021
}
@article{hoogeboom2021argmax,
  author  = {Hoogeboom, Emiel and Nielsen, Didrik and Jaini, Priyank and Forr{\'e}, Patrick and Welling, Max},
  journal = {Neural Information Processing Systems},
  pages   = {12454--12465},
  title   = {Argmax flows and multinomial diffusion: Learning categorical distributions},
  volume  = 34,
  year    = 2021
}
@article{kong2020diffwave,
  author  = {Kong, Zhifeng and Ping, Wei and Huang, Jiaji and Zhao, Kexin and Catanzaro, Bryan},
  journal = {International Conference on Learning Representations},
  title   = {Diff{W}ave: A versatile diffusion model for audio synthesis},
  year    = 2021
}
@article{chen2021wavegrad,
  author  = {Chen, Nanxin and Zhang, Yu and Zen, Heiga and Weiss, Ron J and Norouzi, Mohammad and Dehak, Najim and Chan, William},
  journal = {INTERSPEECH},
  pages   = {3765--3769},
  title   = {Wave{G}rad 2: Iterative refinement for text-to-speech synthesis},
  year    = 2021
}
@article{kingma2021variational,
  author  = {Kingma, Diederik and Salimans, Tim and Poole, Ben and Ho, Jonathan},
  journal = {Neural Information Processing Systems},
  pages   = {21696--21707},
  title   = {Variational diffusion models},
  volume  = 34,
  year    = 2021
}
@article{song2020score,
  author  = {Song, Yang and Sohl-Dickstein, Jascha and Kingma, Diederik P and Kumar, Abhishek and Ermon, Stefano and Poole, Ben},
  journal = {International Conference on Learning Representations},
  title   = {Score-based generative modeling through stochastic differential equations},
  year    = 2021
}
@inproceedings{nichol2021improved,
  author    = {Nichol, Alexander Quinn and Dhariwal, Prafulla},
  booktitle = {International Conference on Machine Learning},
  pages     = {8162--8171},
  title     = {Improved denoising diffusion probabilistic models},
  year      = 2021
}
@article{vahdat2021score,
  author  = {Vahdat, Arash and Kreis, Karsten and Kautz, Jan},
  journal = {Neural Information Processing Systems},
  pages   = {11287--11302},
  title   = {Score-based generative modeling in latent space},
  volume  = 34,
  year    = 2021
}
@article{song2020denoising,
  author  = {Song, Jiaming and Meng, Chenlin and Ermon, Stefano},
  journal = {International Conference on Learning Representations},
  title   = {Denoising diffusion implicit models},
  year    = 2021
}
@article{gao2020learning,
  author  = {Gao, Ruiqi and Song, Yang and Poole, Ben and Wu, Ying Nian and Kingma, Diederik P},
  journal = {International Conference on Learning Representations},
  title   = {Learning energy-based models by diffusion recovery likelihood},
  year    = 2021
}
@inproceedings{choi2021ilvr,
  author    = {Jooyoung Choi and Sungwon Kim and Yonghyun Jeong and Youngjune Gwon and Sungroh Yoon},
  booktitle = {IEEE/CVF International Conference on Computer Vision},
  pages     = {14347--14356},
  title     = {{ILVR:} {C}onditioning Method for Denoising Diffusion Probabilistic Models},
  year      = 2021
}
@article{zhang2021diffusion,
  author  = {Zhang, Qinsheng and Chen, Yongxin},
  journal = {Neural Information Processing Systems},
  pages   = {16280--16291},
  title   = {Diffusion normalizing flow},
  volume  = 34,
  year    = 2021
}
@article{ye2021mastering,
  author  = {Ye, Weirui and Liu, Shaohuai and Kurutach, Thanard and Abbeel, Pieter and Gao, Yang},
  journal = {Neural Information Processing Systems},
  pages   = {25476--25488},
  title   = {Mastering {A}tari games with limited data},
  volume  = 34,
  year    = 2021
}
@article{mazyavkina2021reinforcement,
  author    = {Mazyavkina, Nina and Sviridov, Sergey and Ivanov, Sergei and Burnaev, Evgeny},
  journal   = {Computers \& Operations Research},
  pages     = 105400,
  publisher = {Elsevier},
  title     = {Reinforcement learning for combinatorial optimization: A survey},
  volume    = 134,
  year      = 2021
}
@article{chen2021decision,
  author  = {Chen, Lili and Lu, Kevin and Rajeswaran, Aravind and Lee, Kimin and Grover, Aditya and Laskin, Misha and Abbeel, Pieter and Srinivas, Aravind and Mordatch, Igor},
  journal = {Neural Information Processing Systems},
  pages   = {15084--15097},
  title   = {Decision transformer: Reinforcement learning via sequence modeling},
  volume  = 34,
  year    = 2021
}
@article{pateria2021hierarchical,
  author    = {Pateria, Shubham and Subagdja, Budhitama and Tan, Ah-hwee and Quek, Chai},
  journal   = {ACM Computing Surveys},
  number    = 5,
  pages     = {1--35},
  publisher = {ACM New York, NY, USA},
  title     = {Hierarchical reinforcement learning: A comprehensive survey},
  volume    = 54,
  year      = 2021
}
@article{zhang2021multi,
  author    = {Zhang, Kaiqing and Yang, Zhuoran and Ba{\c{s}}ar, Tamer},
  journal   = {Handbook of Reinforcement Learning and Control},
  pages     = {321--384},
  publisher = {Springer},
  title     = {Multi-agent reinforcement learning: A selective overview of theories and algorithms},
  year      = 2021
}
@inproceedings{chen2021cross,
  author    = {Chen, Defang and Mei, Jian-Ping and Zhang, Yuan and Wang, Can and Wang, Zhe and Feng, Yan and Chen, Chun},
  booktitle = {AAAI Conference on Artificial Intelligence},
  pages     = {7028--7036},
  title     = {Cross-layer distillation with semantic calibration},
  year      = 2021
}
@article{goyal2021non,
  author  = {Goyal, Ankit and Bochkovskiy, Alexey and Deng, Jia and Koltun, Vladlen},
  journal = {arXiv:2110.07641},
  title   = {Non-deep networks},
  year    = 2021
}
@article{frankle2020training,
  author  = {Frankle, Jonathan and Schwab, David J and Morcos, Ari S},
  journal = {International Conference on Learning Representations},
  title   = {Training {B}atch{N}orm and only {B}atch{N}orm: On the expressive power of random features in {CNN}s},
  year    = 2021
}
@article{Fazelpour-Danks-2021,
  author  = {Sina Fazelpour and David Danks},
  journal = {Philosophy Compass},
  title   = {Algorithmic Bias: Senses, Sources, Solutions},
  volume  = 16,
  year    = 2021
}
@article{Mitchell-et-al-2021,
  author  = {Shira Mitchell and Eric Potash and Solon Barocas and Alexander D'Amour and Kristian Lum},
  journal = {Annual Review of Statistics and Its Application},
  pages   = {141--163},
  title   = {Algorithmic Fairness: Choices, Assumptions, and Definitions},
  volume  = 8,
  year    = 2021
}
@inproceedings{Tomasev-et-al-2021,
  author    = {Nenand Tomasev and Kevin R. McKee and Jackie Kay and Shakir Mohamed},
  booktitle = {AAAI/ACM Conference on AI, Ethics, and Society},
  pages     = {254--265},
  title     = {Fairness for Unobserved Characteristics: Insights from Technological Impacts on Queer Communities},
  year      = 2021
}
@article{Erasmus-et-al-2021,
  author  = {Adrian Erasmus and Tyler D. P. Brunet and Eyal Fisher},
  journal = {Philosophy \& Technology},
  pages   = {833--862},
  title   = {What is Interpretability?},
  volume  = 34,
  year    = 2021
}
@inproceedings{Stark-Hoey-2021,
  author    = {Luke Stark and Jesse Hoey},
  booktitle = {ACM Conference on Fairness, Accountability, and Transparency},
  pages     = {782--793},
  title     = {The Ethics of Emotions in Artificial Intelligence Systems},
  year      = 2021
}
@article{Bontridder-Poullet-2021,
  author  = {No{\'e}mi Bontridder and Yves Poullet},
  journal = {Data \& Policy},
  pages   = {E32},
  title   = {The Role of Artificial Intelligence in Disinformation},
  volume  = 3,
  year    = 2021
}
@article{LaCroix-et-al-2021,
  author  = {Travis LaCroix and Anders Geil and Cailin O'Connor},
  journal = {Philosophy of Science},
  number  = 3,
  pages   = {415--438},
  title   = {The Dynamics of Retraction in Epistemic Networks},
  volume  = 88,
  year    = 2021
}
@inproceedings{Abdalla-Abdalla-2021,
  author    = {Mohamed Abdalla and Moustafa Abdalla},
  booktitle = {AAAI/ACM Conference on AI, Ethics, and Society},
  pages     = {287--297},
  title     = {The Grey Hoodie Project: Big Tobacco, Big Tech, and the threat on academic integrity},
  year      = 2021
}
@inproceedings{saharia2022palette,
  author    = {Saharia, Chitwan and Chan, William and Chang, Huiwen and Lee, Chris and Ho, Jonathan and Salimans, Tim and Fleet, David and Norouzi, Mohammad},
  booktitle = {ACM SIGGRAPH},
  title     = {Palette: Image-to-image diffusion models},
  year      = 2022
}
@inproceedings{sauer2022stylegan,
  author    = {Sauer, Axel and Schwarz, Katja and Geiger, Andreas},
  booktitle = {ACM SIGGRAPH},
  title     = {{StyleGAN-XL}: Scaling {StyleGAN} to large diverse datasets},
  year      = 2022
}
@article{ramesh2022hierarchical,
  author  = {Ramesh, Aditya and Dhariwal, Prafulla and Nichol, Alex and Chu, Casey and Chen, Mark},
  journal = {arXiv:2204.06125},
  title   = {Hierarchical text-conditional image generation with {CLIP} latents},
  year    = 2022
}
@misc{grennan2022why,
  author       = {Grennan, Liz and Kremer, Andreas and Singla, Alex and Zipparo, Peter},
  howpublished = {McKinsey, September 29, 2022. \url{https://www.mckinsey.com/capabilities/quantumblack/our-insights/why-businesses-need-explainable-ai-and-how-to-deliver-it/}},
  title        = {{\em {W}hy businesses need explainable {AI}—and how to deliver it}},
  year         = 2022
}
@misc{heikkilla2022business,
  author       = {Heikkil\"a, Melissa},
  howpublished = {MIT Technology Review, July 7 2022. \url{https://www.technologyreview.com/2022/07/07/1055526/why-business-is-booming-for-military-ai-startups/}},
  title        = {{\em {W}hy business is booming for military {AI} startups}},
  year         = 2022
}
@book{murphy2022probabilistic,
  author    = {Murphy, Kevin P},
  publisher = {MIT Press},
  title     = {Probabilistic machine learning: An introduction},
  year      = 2022
}
@book{szeliski2022computer,
  author    = {Szeliski, Richard},
  publisher = {Springer},
  title     = {Computer vision: Algorithms and applications, 2nd {E}dition},
  year      = 2022
}
@article{schmidhuber2022annotated,
  author  = {Schmidhuber, Juergen},
  journal = {arXiv:2212.11279},
  title   = {Annotated History of Modern {AI} and Deep Learning},
  year    = 2022
}
@article{vardi2022width,
  author  = {Vardi, Gal and Yehudai, Gilad and Shamir, Ohad},
  journal = {PMRL Conference on Learning Theory},
  pages   = {1--33},
  title   = {Width is Less Important than Depth in {R}e{LU} Neural Networks},
  year    = 2022
}
@inproceedings{pezeshki2022multi,
  author    = {Pezeshki, Mohammad and Mitra, Amartya and Bengio, Yoshua and Lajoie, Guillaume},
  booktitle = {International Conference on Machine Learning},
  pages     = {17669--17690},
  title     = {Multi-scale feature learning dynamics: Insights for double descent},
  year      = 2022
}
@article{ganaie2021ensemble,
  author  = {Ganaie, M.A. and Hu, Minghui and Malik, A.K. and Tanveer, M. and Suganthan, P.N.},
  journal = {Engineering Applications of Artificial Intelligence},
  title   = {Ensemble Deep Learning: A Review},
  volume  = 115,
  year    = 2022
}
@article{bayer2021survey,
  author  = {Bayer, Markus and Kaufhold, Marc-Andr\'{e} and Reuter, Christian},
  journal = {ACM Computing Surveys},
  number  = 7,
  pages   = {1--39},
  title   = {A Survey on Data Augmentation for Text Classification},
  volume  = 55,
  year    = 2022
}
@article{riad2022learning,
  author  = {Riad, Rachid and Teboul, Olivier and Grangier, David and Zeghidour, Neil},
  journal = {International Conference on Learning Representations},
  title   = {Learning strides in convolutional neural networks},
  year    = 2022
}
@article{wang2022yolov7,
  author  = {Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark},
  journal = {arXiv:2207.02696},
  title   = {YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors},
  year    = 2022
}
@article{ulku2022survey,
  author  = {Ulku, Irem and Akag{\"u}nd{\"u}z, Erdem},
  journal = {Applied Artificial Intelligence},
  number  = 1,
  title   = {A survey on deep learning-based architectures for semantic segmentation on {2D} images},
  volume  = 36,
  year    = 2022
}
@article{phuong2022formal,
  author  = {Phuong, Mary and Hutter, Marcus},
  doi     = {10.48550/ARXIV.2207.09238},
  journal = {Technical Report, DeepMind},
  title   = {Formal Algorithms for Transformers},
  year    = 2022
}
@article{lin2021survey,
  author  = {Tianyang Lin and Yuxin Wang and Xiangyang Liu and Xipeng Qiu},
  journal = {AI Open},
  pages   = {111--132},
  title   = {A survey of transformers},
  volume  = 3,
  year    = 2022
}
@article{bigBench2022,
  author  = {Srivastava, Aarohi and Rastogi, Abhinav and Rao, Abhishek and Shoeb, Abu Awal Md and Abid, Abubakar and Fisch, Adam and Brown, Adam R and Santoro, Adam and Gupta, Aditya and Garriga-Alonso, Adri{\`a} and others},
  journal = {arXiv:2206.04615},
  title   = {Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models},
  year    = 2022
}
@inproceedings{du2022glam,
  author    = {Du, Nan and Huang, Yanping and Dai, Andrew M and Tong, Simon and Lepikhin, Dmitry and Xu, Yuanzhong and Krikun, Maxim and Zhou, Yanqi and Yu, Adams Wei and Firat, Orhan and others},
  booktitle = {International Conference on Machine Learning},
  pages     = {5547--5569},
  title     = {G{L}a{M}: Efficient scaling of language models with mixture-of-experts},
  year      = 2022
}
@article{smith2022using,
  author  = {Smith, Shaden and Patwary, Mostofa and Norick, Brandon and LeGresley, Patrick and Rajbhandari, Samyam and Casper, Jared and Liu, Zhun and Prabhumoye, Shrimai and Zerveas, George and Korthikanti, Vijay and others},
  journal = {arXiv:2201.11990},
  title   = {Using {D}eep{S}peed and {M}egatron to train {M}egatron-{T}uring {NLG} 530{B}, a large-scale generative language model},
  year    = 2022
}
@article{thoppilan2022lamda,
  author  = {Thoppilan, Romal and De Freitas, Daniel and Hall, Jamie and Shazeer, Noam and Kulshreshtha, Apoorv and Cheng, Heng-Tze and Jin, Alicia and Bos, Taylor and Baker, Leslie and Du, Yu and others},
  journal = {arXiv:2201.08239},
  title   = {La{MDA}: Language models for dialog applications},
  year    = 2022
}
@article{chowdhery2022palm,
  author  = {Chowdhery, Aakanksha and Narang, Sharan and Devlin, Jacob and Bosma, Maarten and Mishra, Gaurav and Roberts, Adam and Barham, Paul and Chung, Hyung Won and Sutton, Charles and Gehrmann, Sebastian and others},
  journal = {arXiv:2204.02311},
  title   = {Pa{LM}: Scaling language modeling with pathways},
  year    = 2022
}
@misc{clark2022engineer,
  author       = {Clark, Mitchell},
  howpublished = {The Verge, July 22, 2022. \url{https://www.theverge.com/2022/7/22/23274958/google-ai-engineer-blake-lemoine-chatbot-lamda-2-sentience}},
  title        = {The engineer who claimed a {G}oogle {AI} is sentient has been fired},
  year         = 2022
}
@inproceedings{gu2022multi,
  author    = {Gu, Jiaqi and Kwon, Hyoukjun and Wang, Dilin and Ye, Wei and Li, Meng and Chen, Yu-Hsin and Lai, Liangzhen and Chandra, Vikas and Pan, David Z},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {12094--12103},
  title     = {Multi-scale high-resolution vision transformer for semantic segmentation},
  year      = 2022
}
@inproceedings{li2022mat,
  author    = {Li, Wenbo and Lin, Zhe and Zhou, Kun and Qi, Lu and Wang, Yi and Jia, Jiaya},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {10758--10768},
  title     = {{MAT}: Mask-Aware Transformer for Large Hole Image Inpainting},
  year      = 2022
}
@article{khan2021transformers,
  author  = {Khan, Salman and Naseer, Muzammal and Hayat, Munawar and Zamir, Syed Waqas and Khan, Fahad Shahbaz and Shah, Mubarak},
  journal = {ACM Computing Surveys},
  number  = 10,
  pages   = {200:1--200:41},
  title   = {Transformers in vision: A survey},
  volume  = 54,
  year    = 2022
}
@inproceedings{liu2022swin,
  author    = {Liu, Ze and Hu, Han and Lin, Yutong and Yao, Zhuliang and Xie, Zhenda and Wei, Yixuan and Ning, Jia and Cao, Yue and Zhang, Zheng and Dong, Li and Wei, Furu and Guo, Baining},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  month     = {June},
  pages     = {12009--12019},
  title     = {Swin Transformer {V}2: Scaling Up Capacity and Resolution},
  year      = 2022
}
@article{wang2021crossformer,
  author  = {Wang, Wenxiao and Yao, Lu and Chen, Long and Lin, Binbin and Cai, Deng and He, Xiaofei and Liu, Wei},
  journal = {International Conference on Learning Representations},
  title   = {Crossformer: A versatile vision transformer hinging on cross-scale attention},
  year    = 2022
}
@article{ding2022davit,
  author  = {Ding, Mingyu and Xiao, Bin and Codella, Noel and Luo, Ping and Wang, Jingdong and Yuan, Lu},
  journal = {European Conference on Computer Vision},
  pages   = {74--92},
  title   = {Da{V}i{T}: Dual Attention Vision Transformers},
  year    = 2022
}
@inproceedings{dong2022cswin,
  author    = {Dong, Xiaoyi and Bao, Jianmin and Chen, Dongdong and Zhang, Weiming and Yu, Nenghai and Yuan, Lu and Chen, Dong and Guo, Baining},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {12124--12134},
  title     = {C{SW}in transformer: A general vision transformer backbone with cross-shaped windows},
  year      = 2022
}
@article{selva2022video,
  author  = {Selva, Javier and Johansen, Anders S and Escalera, Sergio and Nasrollahi, Kamal and Moeslund, Thomas B and Clap{\'e}s, Albert},
  journal = {arXiv:2201.05991},
  title   = {Video transformers: A survey},
  year    = 2022
}
@article{hu2021graph,
  author    = {Hu, Wei and Pang, Jiahao and Liu, Xianming and Tian, Dong and Lin, Chia-Wen and Vetro, Anthony},
  journal   = {IEEE Transactions on Multimedia},
  number    = {},
  pages     = {3961--3977},
  publisher = {IEEE},
  title     = {Graph signal processing for geometric data and beyond: Theory and applications},
  volume    = 24,
  year      = 2022
}
@article{xiao2022graph,
  author  = {Xiao, Shunxin and Wang, Shiping and Dai, Yuanfei and Guo, Wenzhong},
  journal = {Machine Vision and Applications},
  number  = 1,
  pages   = {1--19},
  title   = {Graph neural networks in node classification: Survey and evaluation},
  volume  = 33,
  year    = 2022
}
@article{grattarola2022understanding,
  author    = {Grattarola, Daniele and Zambon, Daniele and Bianchi, Filippo Maria and Alippi, Cesare},
  journal   = {IEEE Transactions on Neural Networks and Learning Systems},
  publisher = {IEEE},
  title     = {Understanding pooling in graph neural networks},
  year      = 2022
}
@article{bond2021deep,
  author  = {Sam Bond{-}Taylor and Adam Leach and Yang Long and Chris G. Willcocks},
  journal = {IEEE Transactions on Pattern Analysis \& Machine Intelligence},
  number  = 11,
  pages   = {7327--7347},
  title   = {Deep Generative Modelling: {A} Comparative Review of {VAE}s, {GAN}s, Normalizing Flows, Energy-Based and Autoregressive Models},
  volume  = 44,
  year    = 2022
}
@article{borji2019pros,
  author  = {Ali Borji},
  journal = {Computer Vision \& Image Understanding},
  pages   = 103329,
  title   = {Pros and cons of {GAN} evaluation measures: New developments},
  volume  = 215,
  year    = 2022
}
@misc{hindupur2022zoo,
  author       = {Hindupur, Avinash},
  howpublished = {GitHub {R}etrieved {J}anuary 17, 2023. \url{https://github.com/hindupuravinash/the-gan-zoo}},
  title        = {The {GAN} Zoo},
  year         = 2022
}
@article{roich2021pivotal,
  author    = {Roich, Daniel and Mokady, Ron and Bermano, Amit H and Cohen-Or, Daniel},
  journal   = {ACM Transactions on Graphics (TOG)},
  number    = 1,
  pages     = {1--13},
  publisher = {ACM New York, NY},
  title     = {Pivotal tuning for latent-based editing of real images},
  volume    = 42,
  year      = 2022
}
@article{xia2022gan,
  author    = {Xia, Weihao and Zhang, Yulun and Yang, Yujiu and Xue, Jing-Hao and Zhou, Bolei and Yang, Ming-Hsuan},
  journal   = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  pages     = {1--17},
  publisher = {IEEE},
  title     = {{GAN} inversion: A survey},
  year      = 2022
}
@inproceedings{mangalam2022reversible,
  author    = {Mangalam, Karttikeya and Fan, Haoqi and Li, Yanghao and Wu, Chao-Yuan and Xiong, Bo and Feichtenhofer, Christoph and Malik, Jitendra},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {10830--10840},
  title     = {Reversible Vision Transformers},
  year      = 2022
}
@article{saharia2022photorealistic,
  author  = {Saharia, Chitwan and Chan, William and Saxena, Saurabh and Li, Lala and Whang, Jay and Denton, Emily and Ghasemipour, Seyed Kamyar Seyed and Ayan, Burcu Karagol and Mahdavi, S Sara and Lopes, Rapha Gontijo and others},
  journal = {arXiv:2205.11487},
  title   = {Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding},
  year    = 2022
}
@article{karras2022elucidating,
  author  = {Karras, Tero and Aittala, Miika and Aila, Timo and Laine, Samuli},
  journal = {Neural Information Processing Systems},
  title   = {Elucidating the Design Space of Diffusion-Based Generative Models},
  year    = 2022
}
@article{croitoru2022diffusion,
  author  = {Croitoru, Florinel-Alin and Hondru, Vlad and Ionescu, Radu Tudor and Shah, Mubarak},
  journal = {arXiv:2209.04747},
  title   = {Diffusion Models in Vision: A Survey},
  year    = 2022
}
@article{cao2022survey,
  author  = {Cao, Hanqun and Tan, Cheng and Gao, Zhangyang and Chen, Guangyong and Heng, Pheng-Ann and Li, Stan Z},
  journal = {arXiv:2209.02646},
  title   = {A Survey on Generative Diffusion Model},
  year    = 2022
}
@article{luo2022understanding,
  author  = {Luo, Calvin},
  journal = {arXiv:2208.11970},
  title   = {Understanding Diffusion Models: A Unified Perspective},
  year    = 2022
}
@article{yang2022diffusion,
  author  = {Yang, Ruihan and Srivastava, Prakhar and Mandt, Stephan},
  journal = {arXiv:2203.09481},
  title   = {Diffusion probabilistic modeling for video generation},
  year    = 2022
}
@inproceedings{nichol2021glide,
  author    = {Alexander Quinn Nichol and Prafulla Dhariwal and Aditya Ramesh and Pranav Shyam and Pamela Mishkin and Bob McGrew and Ilya Sutskever and Mark Chen},
  booktitle = {International Conference on Machine Learning},
  pages     = {16784--16804},
  title     = {{GLIDE:} Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models},
  year      = 2022
}
@article{saharia2022image,
  author    = {Saharia, Chitwan and Ho, Jonathan and Chan, William and Salimans, Tim and Fleet, David J and Norouzi, Mohammad},
  journal   = {IEEE Transactions on Pattern Analysis \& Machine Intelligence},
  pages     = {1--14},
  publisher = {IEEE},
  title     = {Image super-resolution via iterative refinement},
  year      = 2022
}
@article{hertz2022prompt,
  author  = {Hertz, Amir and Mokady, Ron and Tenenbaum, Jay and Aberman, Kfir and Pritch, Yael and Cohen-Or, Daniel},
  journal = {arXiv:2208.01626},
  title   = {Prompt-to-prompt image editing with cross attention control},
  year    = 2022
}
@inproceedings{nie2022diffusion,
  author    = {Weili Nie and Brandon Guo and Yujia Huang and Chaowei Xiao and Arash Vahdat and Animashree Anandkumar},
  booktitle = {International Conference on Machine Learning},
  pages     = {16805--16827},
  title     = {Diffusion Models for Adversarial Purification},
  year      = 2022
}
@article{baranchuk2021label,
  author  = {Baranchuk, Dmitry and Rubachev, Ivan and Voynov, Andrey and Khrulkov, Valentin and Babenko, Artem},
  journal = {International Conference on Learning Representations},
  title   = {Label-efficient semantic segmentation with diffusion models},
  year    = 2022
}
@article{chung2022score,
  author    = {Chung, Hyungjin and Ye, Jong Chul},
  journal   = {Medical Image Analysis},
  pages     = 102479,
  publisher = {Elsevier},
  title     = {Score-based diffusion models for accelerated {MRI}},
  volume    = 80,
  year      = 2022
}
@inproceedings{chung2022come,
  author    = {Chung, Hyungjin and Sim, Byeongsu and Ye, Jong Chul},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {12413--12422},
  title     = {Come-closer-diffuse-faster: Accelerating conditional diffusion models for inverse problems through stochastic contraction},
  year      = 2022
}
@article{peng2022towards,
  author  = {Peng, Cheng and Guo, Pengfei and Zhou, S Kevin and Patel, Vishal and Chellappa, Rama},
  journal = {Medical Image Computing and Computer Assisted Intervention},
  pages   = {623--633},
  title   = {Towards performant and reliable undersampled {MR} reconstruction via diffusion model sampling},
  volume  = 13436,
  year    = 2022
}
@inproceedings{xie2022measurement,
  author    = {Yutong Xie and Quanzheng Li},
  booktitle = {Medical Image Computing and Computer Assisted Intervention},
  pages     = {655--664},
  title     = {Measurement-Conditioned Denoising Diffusion Probabilistic Model for Under-Sampled Medical Image Reconstruction},
  volume    = 13436,
  year      = 2022
}
@article{luo2022mri,
  author  = {Luo, Guanxiong and Heide, Martin and Uecker, Martin},
  journal = {arXiv:2202.01479},
  title   = {{MRI} Reconstruction via Data Driven {M}arkov Chain with Joint Uncertainty Estimation},
  year    = 2022
}
@article{ho2022video,
  author  = {Ho, Jonathan and Salimans, Tim and Gritsenko, Alexey and Chan, William and Norouzi, Mohammad and Fleet, David J},
  journal = {International Conference on Learning Representations},
  title   = {Video diffusion models},
  year    = 2022
}
@article{harvey2022flexible,
  author  = {Harvey, William and Naderiparizi, Saeid and Masrani, Vaden and Weilbach, Christian and Wood, Frank},
  journal = {Neural Information Processing Systems},
  title   = {Flexible Diffusion Modeling of Long Videos},
  volume  = 35,
  year    = 2022
}
@article{hoppe2022diffusion,
  author  = {H{\"o}ppe, Tobias and Mehrjou, Arash and Bauer, Stefan and Nielsen, Didrik and Dittadi, Andrea},
  journal = {ECCV Workshop on AI for Creative Video Editing and Understanding},
  title   = {Diffusion Models for Video Prediction and Infilling},
  year    = 2022
}
@article{voleti2022masked,
  author  = {Voleti, Vikram and Jolicoeur-Martineau, Alexia and Pal, Christopher},
  journal = {Neural Information Processing Systems},
  title   = {{MCVD}: Masked Conditional Video Diffusion for Prediction, Generation, and Interpolation},
  volume  = 35,
  year    = 2022
}
@article{rissanen2022generative,
  author  = {Rissanen, Severi and Heinonen, Markus and Solin, Arno},
  journal = {arXiv:2206.13397},
  title   = {Generative modelling with inverse heat dissipation},
  year    = 2022
}
@article{bansal2022cold,
  author  = {Bansal, Arpit and Borgnia, Eitan and Chu, Hong-Min and Li, Jie S and Kazemi, Hamid and Huang, Furong and Goldblum, Micah and Geiping, Jonas and Goldstein, Tom},
  journal = {arXiv:2208.09392},
  title   = {Cold diffusion: Inverting arbitrary image transforms without noise},
  year    = 2022
}
@article{lipman2022flow,
  author  = {Lipman, Yaron and Chen, Ricky TQ and Ben-Hamu, Heli and Nickel, Maximilian and Le, Matt},
  journal = {arXiv:2210.02747},
  title   = {Flow matching for generative modeling},
  year    = 2022
}
@inproceedings{choi2022perception,
  author    = {Choi, Jooyoung and Lee, Jungbeom and Shin, Chaehun and Kim, Sungwon and Kim, Hyunwoo and Yoon, Sungroh},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {11472--11481},
  title     = {Perception Prioritized Training of Diffusion Models},
  year      = 2022
}
@article{bao2022analytic,
  author  = {Bao, Fan and Li, Chongxuan and Zhu, Jun and Zhang, Bo},
  journal = {International Conference on Learning Representations},
  title   = {Analytic-{DPM}: An analytic estimate of the optimal reverse variance in diffusion probabilistic models},
  year    = 2022
}
@article{ho2022cascaded,
  author  = {Ho, Jonathan and Saharia, Chitwan and Chan, William and Fleet, David J and Norouzi, Mohammad and Salimans, Tim},
  journal = {Journal of Machine Learning Research},
  pages   = {47--1},
  title   = {Cascaded Diffusion Models for High Fidelity Image Generation},
  volume  = 23,
  year    = 2022
}
@inproceedings{rombach2022high,
  author    = {Rombach, Robin and Blattmann, Andreas and Lorenz, Dominik and Esser, Patrick and Ommer, Bj{\"o}rn},
  booktitle = {IEEE/CVF Computer Vision \& Pattern Recognition},
  pages     = {10684--10695},
  title     = {High-resolution image synthesis with latent diffusion models},
  year      = 2022
}
@article{xiao2021tackling,
  author  = {Xiao, Zhisheng and Kreis, Karsten and Vahdat, Arash},
  journal = {International Conference on Learning Representations},
  title   = {Tackling the generative learning trilemma with denoising diffusion {GAN}s},
  year    = 2022
}
@article{salimans2022progressive,
  author  = {Salimans, Tim and Ho, Jonathan},
  journal = {International Conference on Learning Representations},
  title   = {Progressive distillation for fast sampling of diffusion models},
  year    = 2022
}
@article{dockhorn2021score,
  author  = {Dockhorn, Tim and Vahdat, Arash and Kreis, Karsten},
  journal = {International Conference on Learning Representations},
  title   = {Score-based generative modeling with critically-damped {L}angevin diffusion},
  year    = 2022
}
@article{ho2022classifier,
  author  = {Ho, Jonathan and Salimans, Tim},
  journal = {NeurIPS Workshop on Deep Generative Models and Downstream Applications},
  title   = {Classifier-free diffusion guidance},
  year    = 2022
}
@article{feng2022ernie,
  author  = {Feng, Zhida and Zhang, Zhenyu and Yu, Xintong and Fang, Yewei and Li, Lanxin and Chen, Xuyi and Lu, Yuxiang and Liu, Jiaxiang and Yin, Weichong and Feng, Shikun and others},
  journal = {arXiv:2210.15257},
  title   = {{ERNIE-ViLG} 2.0: Improving Text-to-Image Diffusion Model with Knowledge-Enhanced Mixture-of-Denoising-Experts},
  year    = 2022
}
@article{wang2022deep,
  author    = {Wang, Xu and Wang, Sen and Liang, Xingxing and Zhao, Dawei and Huang, Jincai and Xu, Xin and Dai, Bin and Miao, Qiguang},
  journal   = {IEEE Transactions on Neural Networks and Learning Systems},
  publisher = {IEEE},
  title     = {Deep reinforcement learning: {A} survey},
  year      = 2022
}
@article{meta2022human,
  author    = {FAIR},
  journal   = {Science},
  number    = 6624,
  pages     = {1067--1074},
  publisher = {American Association for the Advancement of Science},
  title     = {Human-level play in the game of {D}iplomacy by combining language models with strategic reasoning},
  volume    = 378,
  year      = 2022
}
@article{fawzi2022discovering,
  author    = {Fawzi, Alhussein and Balog, Matej and Huang, Aja and Hubert, Thomas and Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and R Ruiz, Francisco J and Schrittwieser, Julian and Swirszcz, Grzegorz and others},
  journal   = {Nature},
  number    = 7930,
  pages     = {47--53},
  publisher = {Nature Publishing Group},
  title     = {Discovering faster matrix multiplication algorithms with reinforcement learning},
  volume    = 610,
  year      = 2022
}
@inproceedings{mei2022role,
  author    = {Jincheng Mei and Wesley Chung and Valentin Thomas and Bo Dai and Csaba Szepesv{\'{a}}ri and Dale Schuurmans},
  booktitle = {Neural Information Processing Systems},
  pages     = {17818--17830},
  title     = {The Role of Baselines in Policy Gradient Optimization},
  volume    = 35,
  year      = 2022
}
@article{zheng2022online,
  author  = {Zheng, Qinqing and Zhang, Amy and Grover, Aditya},
  journal = {International Conference on Machine Learning},
  pages   = {27042--27059},
  title   = {Online decision transformer},
  volume  = 162,
  year    = 2022
}
@article{ouyang2022training,
  author  = {Ouyang, Long and Wu, Jeffrey and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and others},
  journal = {Neural Information Processing Systems},
  pages   = {27730--27744},
  title   = {Training language models to follow instructions with human feedback},
  volume  = 35,
  year    = 2022
}
@article{power2022grokking,
  author  = {Power, Alethea and Burda, Yuri and Edwards, Harri and Babuschkin, Igor and Misra, Vedant},
  journal = {arXiv:2201.02177},
  title   = {Grokking: Generalization beyond overfitting on small algorithmic datasets},
  year    = 2022
}
@article{rajpurkar2022ai,
  author  = {Rajpurkar, Pranav and Chen, Emma and Banerjee, Oishi and Topol, Eric J},
  journal = {Nature Medicine},
  number  = 1,
  pages   = {31--38},
  title   = {{AI} in health and medicine},
  volume  = 28,
  year    = 2022
}
@article{LaCroix-Mohseni-2022,
  author  = {Travis LaCroix and Aydin Mohseni},
  journal = {Synthese},
  number  = 289,
  title   = {The Tragedy of the {AI} Commons},
  volume  = 200,
  year    = 2022
}
@article{LaCroix-2022-Linguistic,
  author  = {Travis LaCroix},
  journal = {arXiv:2207.00868},
  title   = {The Linguistic Blind Spot of Value-Aligned Agency, Natural and Artificial},
  year    = 2022
}
@article{Green-2022,
  author  = {Ben Green},
  journal = {Philosophy \& Technology},
  number  = 90,
  title   = {Escaping the Impossibility of Fairness: From Formal to Substantive Algorithmic Fairness},
  volume  = 35,
  year    = 2022
}
@article{Falbo-LaCroix-2022,
  author  = {Arianna Falbo and Travis LaCroix},
  journal = {Feminist Philosophy Quarterly},
  number  = {3/4},
  title   = {Est-ce que vous compute? {C}ode-Switching, Cultural Identity, and {AI}},
  volume  = 8,
  year    = 2022
}
@inproceedings{Raji-et-al-2022,
  author    = {Inioluwa Deborah Raji and I. Elizabeth Kumar and Aaron Horowitz and Andrew Selbst},
  booktitle = {ACM Conference on Fairness, Accountability, and Transparency},
  pages     = {959--972},
  title     = {The Fallacy of {AI} Functionality},
  year      = 2022
}
@article{Micelli-et-al-2022,
  author  = {Milagros Micelli and Julian Posada and Tianling Yang},
  journal = {Proceedngs of ACM on Human-Computer Interaction},
  title   = {Studying Up Machine Learning Data: Why Talk About Bias When We Mean Power?},
  volume  = 6,
  year    = 2022
}
@article{mehrabi2022survey,
  author  = {Ninareh Mehrabi and Fred Morstatter and Nripsuta Saxena and Kristina Lerman and Aram Galstyan},
  journal = {ACM Computing Surveys},
  number  = 6,
  pages   = {1--35},
  title   = {A Survey on Bias and Fairness in Machine Learning},
  volume  = 54,
  year    = 2022
}
@misc{prince2022explain,
  author       = {Prince, Simon Jeremy Damion},
  howpublished = {\url{https://www.borealisai.com/research-blogs/explainability-i-local-post-hoc-explanations/}},
  title        = {Explainability {I}: local post-hoc explanations},
  year         = 2022
}
@book{molnar2020interpretable,
  author    = {Christoph Molnar},
  publisher = {\url{https://christophm.github.io/interpretable-ml-book}},
  title     = {Interpretable Machine Learning: A Guide for Making Black Box Models Explainable},
  year      = 2022
}
@article{Smith-Miller-2022,
  author  = {Marcus Smith and Seumas Miller},
  journal = {AI \& Society},
  pages   = {167--175},
  title   = {The Ethical Application of Biometric Facial Recognition Technology},
  volume  = 37,
  year    = 2022
}
@article{Stark-Hutson-2022,
  author  = {Luke Stark and Jevan Hutson},
  journal = {Fordham Intellectual Property, Media \& Entertainment Law Journal},
  number  = 4,
  pages   = {922--978},
  title   = {Physiognomic Artificial Intelligence},
  volume  = {XXXII},
  year    = 2022
}
@incollection{Moore-Himma-2022,
  author    = {Adam Moore and Himma, Ken},
  booktitle = {The {Stanford} Encyclopedia of Philosophy},
  title     = {{Intellectual Property}},
  year      = 2022
}
@article{Carlini-et-al-2022,
  author  = {Nicholas Carlini and Daphne Ippolito and Matthew Jagielski and Katherine Lee and Florian Tramer and and Chiyuan Zhang},
  journal = {arXiv:2202.07646},
  title   = {Quantifying memorization across neural language models},
  year    = 2022
}
@inproceedings{Weidinger-et-al-2022,
  author    = {Laura Weidinger and Jonathan Uesato and Maribeth Rauh and Conor Griffin and Po-Sen Huang and John Mellor and Amelia Glaese and Myra Cheng and Borja Balle and Atoosa Kasirzadeh and Courtney Biles and Sasha Brown and Zac Kenton and Will Hawkins and Tom Stepleton and Abebea Birhane and Lisa Anne Hendricks and Laura Rimell and William Isaac and Julia Haas and Sean Legassick and Geoffrey Irving and Iason Gabriel},
  booktitle = {ACM Conference on Fairness, Accountability, and Transparency},
  pages     = {214--229},
  title     = {Taxonomy of risks posed by language models},
  year      = 2022
}
@article{Luccioni-et-al-2022,
  author  = {Alexandra Sasha Luccioni and Sylvain Viguier and Anne-Laure Ligozat},
  journal = {arXiv:2211.02001},
  title   = {Estimating the Carbon Footprint of BLOOM, a 176B Parameter Language Model},
  year    = 2022
}
@inproceedings{Birhane-et-al-2022,
  author    = {Abeba Birhane and Pratyusha Kalluri and Dallas Card and William Agnew and Ravit Dotan and Michelle Bao},
  booktitle = {ACM Conference on Fairness, Accountability, and Transparency},
  pages     = {173--184},
  title     = {The Values Encoded in Machine Learning Research},
  year      = 2022
}
@article{Johnson-2022,
  author = {Gabrielle M. Johnson},
  title  = {Are Algorithms Value-Free? Feminist Theoretical Virtues in Machine Learning},
  volume = 198,
  year   = 2022
}
@inproceedings{Birhane-et-al-2022-Participatory-AI,
  author    = {Abeba Birhane and William Isaac and Vinodkumar Prabhakaran and Mark Diaz and Madeleine Clare Elish and Iason Gabriel and Shakir Mohamed},
  booktitle = {Equity and Access in Algorithms, Mechanisms, and Optimization},
  title     = {Power to the People? {O}pportunities and Challenges for Participatory {AI}},
  year      = 2022
}
@article{Himmelreich-2022,
  author  = {Johannes Himmelreich},
  journal = {AI \& Society},
  title   = {Against `Democratizing {AI}'},
  year    = 2022
}
@book{murphy2022advanced,
  author    = {Murphy, Kevin P},
  publisher = {MIT Press},
  title     = {Probabilistic machine learning: Advanced topics},
  year      = 2023
}
@book{zhang2021dive,
  author    = {Zhang, Aston and Lipton, Zachary C. and Li, Mu and Smola, Alexander J.},
  publisher = {Cambridge University Press},
  title     = {Dive into deep learning},
  year      = 2023
}
@article{liu2021self,
  author  = {Liu, Xiao and Zhang, Fanjin and Hou, Zhenyu and Mian, Li and Wang, Zhaoyu and Zhang, Jing and Tang, Jie},
  journal = {IEEE Transactions on Knowledge and Data Engineering},
  number  = 1,
  pages   = {857--876},
  title   = {Self-supervised learning: Generative or contrastive},
  volume  = 35,
  year    = 2023
}
@article{zou2023object,
  author  = {Zou, Zhengxia and Chen, Keyan and Shi, Zhenwei and Guo, Yuhong and Ye, Jieping},
  journal = {Proceedings of the IEEE},
  title   = {Object detection in 20 years: A survey},
  year    = 2023
}
@article{hoffmann2022training,
  author  = {Hoffmann, Jordan and Borgeaud, Sebastian and Mensch, Arthur and Buchatskaya, Elena and Cai, Trevor and Rutherford, Eliza and Casas, Diego de Las and Hendricks, Lisa Anne and Welbl, Johannes and Clark, Aidan and others},
  journal = {arXiv:2203.15556},
  title   = {Training Compute-Optimal Large Language Models},
  year    = 2023
}
@article{tay2020efficient,
  author    = {Tay, Yi and Dehghani, Mostafa and Bahri, Dara and Metzler, Donald},
  journal   = {ACM Computing Surveys},
  number    = 6,
  pages     = {109:1--109:28},
  publisher = {ACM New York, NY},
  title     = {Efficient transformers: A survey},
  volume    = 55,
  year      = 2023
}
@article{liu2021survey,
  author  = {Liu, Yang and Zhang, Yao and Wang, Yixin and Hou, Feng and Yuan, Jin and Tian, Jiang and Zhang, Yang and Shi, Zhongchao and Fan, Jianping and He, Zhiqiang},
  journal = {IEEE Transactions on Neural Networks and Learning Systems},
  title   = {A survey of visual transformers},
  year    = 2023
}
@article{velivckovic2023everything,
  author    = {Veli{\v{c}}kovi{\'c}, Petar},
  journal   = {Current Opinion in Structural Biology},
  pages     = 102538,
  publisher = {Elsevier},
  title     = {Everything is connected: Graph neural networks},
  volume    = 79,
  year      = 2023
}
@article{wu2020graph,
  author    = {Wu, Shiwen and Sun, Fei and Zhang, Wentao and Xie, Xu and Cui, Bin},
  journal   = {ACM Computing Surveys},
  number    = 5,
  pages     = {97:1--97:37},
  publisher = {ACM New York, NY},
  title     = {Graph neural networks in recommender systems: A survey},
  volume    = 55,
  year      = 2023
}
@article{poole2022dreamfusion,
  author  = {Poole, Ben and Jain, Ajay and Barron, Jonathan T and Mildenhall, Ben},
  journal = {International Conference on Learning Representations},
  title   = {Dream{F}usion: Text-to-3{D} using 2{D} diffusion},
  year    = 2023
}
@article{zhang2023adding,
  author  = {Zhang, Lvmin and Agrawala, Maneesh},
  journal = {arXiv:2302.05543},
  title   = {Adding conditional control to text-to-image diffusion models},
  year    = 2023
}
@article{moerland2020model,
  author  = {Moerland, Thomas M and Broekens, Joost and Plaat, Aske and Jonker, Catholijn M and others},
  journal = {Foundations and Trends in Machine Learning},
  number  = 1,
  pages   = {1--118},
  title   = {Model-based reinforcement learning: A survey},
  volume  = 16,
  year    = 2023
}
@article{liu2022omnigrok,
  author  = {Liu, Ziming and Michaud, Eric J and Tegmark, Max},
  journal = {International Conference on Learning Representations},
  title   = {Omnigrok: Grokking Beyond Algorithmic Data},
  year    = 2023
}
@article{rolnick2023tackling,
  author  = {David Rolnick and Priya L. Donti and Lynn H. Kaack and Kelly Kochanski and Alexandre Lacoste and Kris Sankaran and Andrew Slavin Ross and Nikola Milojevic{-}Dupont and Natasha Jaques and Anna Waldman{-}Brown and Alexandra Sasha Luccioni and Tegan Maharaj and Evan D. Sherwin and S. Karthik Mukkavilli and Konrad P. Kording and Carla P. Gomes and Andrew Y. Ng and Demis Hassabis and John C. Platt and Felix Creutzig and Jennifer T. Chayes and Yoshua Bengio},
  journal = {ACM Computing Surveys},
  number  = 2,
  pages   = {1--42},
  title   = {Tackling Climate Change with Machine Learning},
  volume  = 55,
  year    = 2023
}
@book{LaCroix2023,
  author = {Travis LaCroix},
  note   = {\url{https://value-alignment.github.io}},
  title  = {Artificial Intelligence and the Value-Alignment Problem: A Philosophical Introduction},
  year   = 2023
}
@book{Barocas-et-al-2019,
  author    = {Solon Barocas and Moritz Hardt and Arvind Narayanan},
  publisher = {MIT Press},
  title     = {Fairness and Machine Learning: Limitations and Opportunities},
  year      = 2023
}
@misc{Goodin-2023,
  author       = {Dan Goodin},
  howpublished = {ars Technica, June 1, 2023. \url{https://arstechnica.com/information-technology/2023/01/chatgpt-is-enabling-script-kiddies-to-write-functional-malware/}},
  title        = {Chat{GPT} is enabling script kiddies to write functional malware},
  year         = 2023
}
@article{Abrahams-2023,
  author  = {Doriel Abrahams},
  journal = {Forter Blog, March 27, 2023},
  note    = {\url{https://www.forter.com/blog/lets-talk-about-generative-ai-and-fraud/}},
  title   = {Let's Talk About Generative {AI} and Fraud},
  year    = 2023
}
@article{Veliz-2023,
  author  = {Carissa V{\'e}liz},
  journal = {Nature},
  pages   = 375,
  title   = {Chatbots shouldn't use emojis},
  volume  = 615,
  year    = 2023
}
@article{Henderson-et-al-2023,
  author  = {Peter Henderson and Xuechen Li and Dan Jurafsky and Tatsunori Hashimoto and Mark A. Lemley and Percy Liang},
  journal = {arXiv:2303.15715},
  title   = {Foundation Models and Fair Use},
  year    = 2023
}
@article{Carlini-et-al-2023,
  author  = {Nicholas Carlini and Jamie Hayes and Milad Nasr and Matthew Jagielski and Vikash Sehwag and Florian Tram{\`e}r and Borja Balle and Daphne Ippolito and Eric Wallace},
  journal = {arXiv:2301.13188},
  title   = {Extracting training data from diffusion models},
  year    = 2023
}
@misc{Luccioni-2023,
  author       = {Alexandra Sasha Luccioni},
  howpublished = {ars Technica, April 12, 2023.\url{https://arstechnica.com/gadgets/2023/04/generative-ai-is-cool-but-lets-not-forget-its-human-and-environmental-costs}},
  title        = {The mounting human and environmental costs of generative {AI}},
  year         = 2023
}
@article{Ceylan-et-al-2023,
  author  = {Gizem Ceylan and Ian A. Anderson and Wendy Wood},
  journal = {Proceedings of the National Academy of Sciences of the United States of America},
  number  = 4,
  title   = {Sharing of misinformation is habitual, not just lazy or biased},
  volume  = 120,
  year    = 2023
}
@book{Broussard-2023,
  author    = {Meredith Broussard},
  publisher = {The MIT Press},
  title     = {More than a Glitch: Confronting Race, Gender, and Ability Bias in Tech},
  year      = 2023
}
@misc{Gebru-et-al-2023,
  author = {Timnit Gebru and Emily M. Bender and Angelina McMillan-Major and Margaret Mitchell},
  note   = {\url{https://www.dair-institute.org/blog/letter-statement-March2023}},
  title  = {Statement from the listed authors of Stochastic Parrots on the ``{AI} pause'' letter},
  year   = 2023
}
@book{torralba2023computervision,
  author    = {Torralba, Antonio and Freeman, William and Isola, Phillip},
  publisher = {MIT Press},
  title     = {Foundations of Computer Vision},
  year      = 2024
}