-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathbook-production.tex
108 lines (75 loc) · 2.57 KB
/
book-production.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
\documentclass[12pt]{book}
\usepackage{graphicx,ae}
\usepackage{color}
\usepackage{amsmath}
\usepackage{amssymb}
% \usepackage{hyperref}
\usepackage{fullpage}
\usepackage{natbib}
\usepackage{framed}
\newcommand{\ymignore}[1]{}
\usepackage{hyperref}
\newenvironment{advanced}{\begin{leftbar}}{\end{leftbar}}
\input{defs}
\title{Reinforcement Learning: Foundations}
\date{February 2023
\\
\textcolor{red}{This book is still work in progress. In particular, references to literature are not complete. We would be grateful for comments, suggestions, omissions, and errors of any kind, at \url{[email protected]}. }
}
\author{Shie Mannor, Yishay Mansour and Aviv Tamar}
\newcommand{\shie}[1]{\textcolor{blue}{#1}}
\begin{document}
\maketitle
\tableofcontents
\chapter{Introduction and Overview}
\label{chapter:intro}
\input{current_chapters/chapter1-intro}
\chapter{Deterministic Decision Processes}
\label{chapter:DDP}
\input{current_chapters/chapter2-ddp}
\chapter{Markov Chains}
\label{chapter:MC}
\input{current_chapters/chapter3-mc}
\chapter{Markov Decision Processes and Finite Horizon Dynamic Programming}
\label{chapter:MDP-FH}
\input{current_chapters/chapter4-mdp-fh}
\chapter{Discounted Markov Decision Processes}
\label{chapter:disc}
\input{current_chapters/chapter5-disc}
\chapter{Linear Programming Solutions}\label{chapter-LP}
\input{current_chapters/LP}
\chapter{Preface to the Learning Chapters}\label{chapter-learning-preface}
\input{future_chapters/chapter-learning-preface}
\chapter{Reinforcement Learning: Model Based}\label{chapter-model-based}
\input{current_chapters/chapter6-mbrl}
\chapter{Reinforcement Learning: Model Free}
\label{chapter:learning-model-free}
\input{current_chapters/chapter7-mfrl}
\chapter{Large State Spaces: Value Function Approximation}
\label{chapter:function-approximation}
\input{current_chapters/chapter8-fa}
\chapter{Large State Space: Policy Gradient Methods}
\label{chapter:policy-gradient}
\input{current_chapters/chapter9-pg}
\chapter{ Multi-Arm bandits}
\label{chapter:MAB}
\input{current_chapters/chapter10-mab}
% \chapter{POMDP}
% \label{chapter:POMDP}
% \input{ym_chapters/Lecture-POMDP}
% \chapter{LQR}
% \label{chapter:LQR}
% \input{ym_chapters/Lecture-LQR}
% \chapter{Generative Model}
% \label{chapter:generative}
% \input{ym_chapters/Lecture-Generative}
% \chapter{Inverse RL and Apprenticeship learning}
% \label{chapter:IRL}
% \input{ym_chapters/Lecture-IRL}
\appendix
\chapter{Dynamic Programming}
\label{chapter:dp}
\input{current_chapters/appendix-dp}
\bibliographystyle{plain}
\bibliography{bib-lecture}
\end{document}