-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathbook-sandbox.tex
140 lines (103 loc) · 3.54 KB
/
book-sandbox.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
\documentclass[12pt]{book}
\usepackage{graphicx,ae}
\usepackage{color}
\usepackage{amsmath}
\usepackage{amssymb}
% \usepackage{hyperref}
\usepackage{fullpage}
\usepackage{natbib}
\usepackage{framed}
\usepackage{subcaption}
% \usepackage{algorithm}
% \usepackage{algpseudocode}
\newcommand{\ymignore}[1]{}
\usepackage{hyperref}
% \newenvironment{advanced}{\begin{leftbar}}{\end{leftbar}}
\newenvironment{advanced}{}{}
\input{defs}
\title{Reinforcement Learning: Foundations}
\date{November 2024 \\
\textcolor{red}{This book is still work in progress. In particular, references to literature are not complete. We would be grateful for comments, suggestions, omissions, and errors of any kind, at \url{[email protected]}. }\\
\flushleft{Please cite as\\
\textcolor{blue}{
@book$\{$MannorMT-RLbook,\\
url = $\{$https://sites.google.com/view/rlfoundations/home$\}$,\\
author = $\{$Mannor, Shie and Mansour, Yishay and Tamar, Aviv$\}$,\\
title = $\{$Reinforcement Learning: Foundations$\}$,\\
year = $\{$2023$\}$,\\
publisher = $\{$-$\}$\\
$\}$\\
}
}
}
\author{Shie Mannor, Yishay Mansour and Aviv Tamar}
\newcommand{\shie}[1]{\textcolor{blue}{#1}}
\begin{document}
\maketitle
\tableofcontents
\chapter{Introduction and Overview}
\label{chapter:intro}
\input{current_chapters/chapter1-intro}
\chapter{Preface to the Planning Chapters}
\label{chapter-planning-preface}
\input{current_chapters/planningpreface}
\chapter{Deterministic Decision Processes}
\label{chapter:DDP}
\input{current_chapters/chapter2-ddp}
\chapter{Markov Chains}
\label{chapter:MC}
\input{current_chapters/chapter3-mc}
\chapter{Markov Decision Processes and Finite Horizon Dynamic Programming}
\label{chapter:MDP-FH}
\input{current_chapters/chapter4-mdp-fh}
\chapter{Discounted Markov Decision Processes}
\label{chapter:disc}
\input{current_chapters/chapter5-disc}
\chapter{Episodic Markov Decision Processes}
\label{chapter:episodic}
\input{future_chapters/chapter5-ssp}
\chapter{Linear Programming Solutions}\label{chapter-LP}
\input{current_chapters/LP}
% \chapter{Preface to the Learning Chapters}\label{chapter-learning-preface}
% \input{future_chapters/chapter-learning-preface}
\chapter{Preface to the Learning Chapters}\label{ym-chapter-learning-preface}
\input{future_chapters/chapter-learning-preface-final}
\chapter{Reinforcement Learning: Model Based}\label{chapter-model-based}
\input{current_chapters/chapter6-mbrl}
\chapter{Reinforcement Learning: Model Free}
\label{chapter:learning-model-free}
\input{current_chapters/chapter7-mfrl}
\chapter{Large State Spaces: Value Function Approximation}
\label{chapter:function-approximation}
\input{current_chapters/chapter8-fa}
\chapter{Large State Space: Policy Gradient Methods}
\label{chapter:policy-gradient}
\input{future_chapters/PG-merge}
\chapter{ Multi-Arm bandits}
\label{chapter:MAB}
\input{current_chapters/chapter10-mab}
% \chapter{Online MDP}
% \label{chapter:OnlineMDP}
% \input{current_chapters/online-MDP}
% \chapter{POMDP}
% \label{chapter:POMDP}
% \input{ym_chapters/Lecture-POMDP}
% \chapter{LQR}
% \label{chapter:LQR}
% \input{ym_chapters/Lecture-LQR}
% \chapter{Generative Model}
% \label{chapter:generative}
% \input{ym_chapters/Lecture-Generative}
% \chapter{Inverse RL and Apprenticeship learning}
% \label{chapter:IRL}
% \input{ym_chapters/Lecture-IRL}
\appendix
\chapter{Dynamic Programming}
\label{chapter:dp}
\input{current_chapters/appendix-dp}
\chapter{Ordinary Differential Equations}
\label{chapter:ode}
\input{future_chapters/chapter-ODEs}
\bibliographystyle{plain}
\bibliography{bib-lecture}
\end{document}