book-sandbox.tex

\documentclass[12pt]{book}
\usepackage{graphicx,ae}
\usepackage{color}
\usepackage{amsmath}
\usepackage{amssymb}
% \usepackage{hyperref}
\usepackage{fullpage}
\usepackage{natbib}
\usepackage{framed}
\usepackage{subcaption}
% \usepackage{algorithm}
% \usepackage{algpseudocode}

\newcommand{\ymignore}[1]{}
\usepackage{hyperref}

% \newenvironment{advanced}{\begin{leftbar}}{\end{leftbar}}
\newenvironment{advanced}{}{}

\input{defs}


\title{Reinforcement Learning: Foundations}
\date{November 2024 \\
  \textcolor{red}{This book is still work in progress. In particular, references to literature are not complete. We would be grateful for comments, suggestions, omissions, and errors of any kind, at \url{rlfoundationsbook@gmail.com}. }\\
\flushleft{Please cite as\\
\textcolor{blue}{
@book$\{$MannorMT-RLbook,\\
  url = $\{$https://sites.google.com/view/rlfoundations/home$\}$,\\
  author = $\{$Mannor, Shie and Mansour, Yishay and Tamar, Aviv$\}$,\\
  title = $\{$Reinforcement Learning:  Foundations$\}$,\\
  year = $\{$2023$\}$,\\
  publisher = $\{$-$\}$\\
$\}$\\
}
}
}

\author{Shie Mannor, Yishay Mansour and Aviv Tamar}


\newcommand{\shie}[1]{\textcolor{blue}{#1}}

\begin{document}
\maketitle

\tableofcontents


\chapter{Introduction and Overview}
\label{chapter:intro}
\input{current_chapters/chapter1-intro}

\chapter{Preface to the Planning Chapters}
\label{chapter-planning-preface}
\input{current_chapters/planningpreface}

\chapter{Deterministic Decision Processes}
\label{chapter:DDP}
\input{current_chapters/chapter2-ddp}

\chapter{Markov Chains}
\label{chapter:MC}
\input{current_chapters/chapter3-mc}

\chapter{Markov Decision Processes and Finite Horizon Dynamic Programming}
\label{chapter:MDP-FH}
\input{current_chapters/chapter4-mdp-fh}

\chapter{Discounted Markov Decision Processes}
\label{chapter:disc}
\input{current_chapters/chapter5-disc}

\chapter{Episodic Markov Decision Processes}
\label{chapter:episodic}
\input{future_chapters/chapter5-ssp}

\chapter{Linear Programming Solutions}\label{chapter-LP}
\input{current_chapters/LP}

% \chapter{Preface to the Learning Chapters}\label{chapter-learning-preface}
% \input{future_chapters/chapter-learning-preface}

\chapter{Preface to the Learning Chapters}\label{ym-chapter-learning-preface}
\input{future_chapters/chapter-learning-preface-final}

\chapter{Reinforcement Learning: Model Based}\label{chapter-model-based}
\input{current_chapters/chapter6-mbrl}

\chapter{Reinforcement Learning: Model Free}
\label{chapter:learning-model-free}
\input{current_chapters/chapter7-mfrl}

\chapter{Large State Spaces: Value Function Approximation}
\label{chapter:function-approximation}
\input{current_chapters/chapter8-fa}

\chapter{Large State Space: Policy Gradient Methods}
\label{chapter:policy-gradient}
\input{future_chapters/PG-merge}

\chapter{ Multi-Arm bandits}
\label{chapter:MAB}
\input{current_chapters/chapter10-mab}

% \chapter{Online MDP}
% \label{chapter:OnlineMDP}
% \input{current_chapters/online-MDP}

% \chapter{POMDP}
% \label{chapter:POMDP}
% \input{ym_chapters/Lecture-POMDP}

% \chapter{LQR}
% \label{chapter:LQR}
% \input{ym_chapters/Lecture-LQR}

% \chapter{Generative Model}
% \label{chapter:generative}
% \input{ym_chapters/Lecture-Generative}

% \chapter{Inverse RL and Apprenticeship learning}
% \label{chapter:IRL}
% \input{ym_chapters/Lecture-IRL}

\appendix

\chapter{Dynamic Programming}
\label{chapter:dp}
\input{current_chapters/appendix-dp}

\chapter{Ordinary Differential Equations}
\label{chapter:ode}
\input{future_chapters/chapter-ODEs}


\bibliographystyle{plain}
\bibliography{bib-lecture}
\end{document}