From 97ad92ab2d5be1c8f5126abddd04dfded4a059c5 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Thu, 26 Oct 2023 07:58:01 +0200
Subject: [PATCH] chore(rust): inline parquet2 (#12026)

---
 .../logical/categorical/string_cache.rs       |    3 +-
 crates/polars-core/src/datatypes/aliases.rs   |   57 +-
 crates/polars-core/src/datatypes/mod.rs       |    1 -
 crates/polars-error/Cargo.toml                |    1 -
 crates/polars-error/src/lib.rs                |    7 -
 crates/polars-parquet/Cargo.toml              |   34 +-
 crates/polars-parquet/LICENSE                 |    2 +-
 crates/polars-parquet/src/arrow/mod.rs        |    2 +-
 .../arrow/read/deserialize/binary/basic.rs    |    8 +-
 .../read/deserialize/binary/dictionary.rs     |    2 +-
 .../arrow/read/deserialize/binary/nested.rs   |    6 +-
 .../arrow/read/deserialize/boolean/basic.rs   |    8 +-
 .../arrow/read/deserialize/boolean/nested.rs  |    6 +-
 .../arrow/read/deserialize/dictionary/mod.rs  |   10 +-
 .../read/deserialize/dictionary/nested.rs     |    8 +-
 .../deserialize/fixed_size_binary/basic.rs    |    8 +-
 .../fixed_size_binary/dictionary.rs           |    2 +-
 .../deserialize/fixed_size_binary/nested.rs   |    6 +-
 .../src/arrow/read/deserialize/mod.rs         |    4 +-
 .../src/arrow/read/deserialize/nested.rs      |    2 +-
 .../arrow/read/deserialize/nested_utils.rs    |    6 +-
 .../src/arrow/read/deserialize/null/mod.rs    |   12 +-
 .../src/arrow/read/deserialize/null/nested.rs |    2 +-
 .../arrow/read/deserialize/primitive/basic.rs |   10 +-
 .../read/deserialize/primitive/dictionary.rs  |    4 +-
 .../read/deserialize/primitive/integer.rs     |   12 +-
 .../read/deserialize/primitive/nested.rs      |    8 +-
 .../src/arrow/read/deserialize/simple.rs      |    8 +-
 .../src/arrow/read/deserialize/utils.rs       |   14 +-
 crates/polars-parquet/src/arrow/read/file.rs  |    2 +-
 .../src/arrow/read/indexes/binary.rs          |    2 +-
 .../src/arrow/read/indexes/boolean.rs         |    2 +-
 .../arrow/read/indexes/fixed_len_binary.rs    |    2 +-
 .../src/arrow/read/indexes/mod.rs             |   22 +-
 .../src/arrow/read/indexes/primitive.rs       |    8 +-
 crates/polars-parquet/src/arrow/read/mod.rs   |   20 +-
 .../src/arrow/read/row_group.rs               |   14 +-
 .../src/arrow/read/schema/convert.rs          |   10 +-
 .../src/arrow/read/schema/metadata.rs         |    2 +-
 .../src/arrow/read/schema/mod.rs              |    4 +-
 .../src/arrow/read/statistics/binary.rs       |    3 +-
 .../src/arrow/read/statistics/boolean.rs      |    3 +-
 .../src/arrow/read/statistics/fixlen.rs       |    2 +-
 .../src/arrow/read/statistics/mod.rs          |   13 +-
 .../src/arrow/read/statistics/primitive.rs    |    7 +-
 .../src/arrow/read/statistics/utf8.rs         |    3 +-
 .../src/arrow/write/binary/basic.rs           |   10 +-
 .../src/arrow/write/binary/nested.rs          |    6 +-
 .../src/arrow/write/boolean/basic.rs          |   14 +-
 .../src/arrow/write/boolean/nested.rs         |    6 +-
 .../src/arrow/write/dictionary.rs             |   12 +-
 crates/polars-parquet/src/arrow/write/file.rs |    8 +-
 .../src/arrow/write/fixed_len_bytes.rs        |    8 +-
 crates/polars-parquet/src/arrow/write/mod.rs  |   21 +-
 .../src/arrow/write/nested/mod.rs             |    6 +-
 .../polars-parquet/src/arrow/write/pages.rs   |   12 +-
 .../src/arrow/write/primitive/basic.rs        |   12 +-
 .../src/arrow/write/primitive/nested.rs       |   10 +-
 .../src/arrow/write/row_group.rs              |    8 +-
 .../polars-parquet/src/arrow/write/schema.rs  |   12 +-
 crates/polars-parquet/src/arrow/write/sink.rs |    4 +-
 .../src/arrow/write/utf8/basic.rs             |   10 +-
 .../src/arrow/write/utf8/nested.rs            |    6 +-
 .../polars-parquet/src/arrow/write/utils.rs   |   14 +-
 crates/polars-parquet/src/lib.rs              |    1 +
 .../src/parquet/bloom_filter/hash.rs          |   17 +
 .../src/parquet/bloom_filter/mod.rs           |   71 +
 .../src/parquet/bloom_filter/read.rs          |   51 +
 .../src/parquet/bloom_filter/split_block.rs   |   82 ++
 .../polars-parquet/src/parquet/compression.rs |  385 ++++++
 .../src/parquet/deserialize/binary.rs         |   70 +
 .../src/parquet/deserialize/boolean.rs        |   39 +
 .../src/parquet/deserialize/filtered_rle.rs   |  274 ++++
 .../src/parquet/deserialize/fixed_len.rs      |  107 ++
 .../src/parquet/deserialize/hybrid_rle.rs     |  204 +++
 .../src/parquet/deserialize/mod.rs            |   17 +
 .../src/parquet/deserialize/native.rs         |   97 ++
 .../src/parquet/deserialize/utils.rs          |  174 +++
 .../src/parquet/encoding/bitpacked/decode.rs  |  211 +++
 .../src/parquet/encoding/bitpacked/encode.rs  |   54 +
 .../src/parquet/encoding/bitpacked/mod.rs     |  220 ++++
 .../src/parquet/encoding/bitpacked/pack.rs    |  108 ++
 .../src/parquet/encoding/bitpacked/unpack.rs  |  137 ++
 .../encoding/delta_bitpacked/decoder.rs       |  362 +++++
 .../encoding/delta_bitpacked/encoder.rs       |  122 ++
 .../parquet/encoding/delta_bitpacked/mod.rs   |   90 ++
 .../encoding/delta_byte_array/decoder.rs      |  106 ++
 .../encoding/delta_byte_array/encoder.rs      |   32 +
 .../parquet/encoding/delta_byte_array/mod.rs  |   33 +
 .../delta_length_byte_array/decoder.rs        |   80 ++
 .../delta_length_byte_array/encoder.rs        |   19 +
 .../encoding/delta_length_byte_array/mod.rs   |   50 +
 .../src/parquet/encoding/hybrid_rle/bitmap.rs |  102 ++
 .../parquet/encoding/hybrid_rle/decoder.rs    |  142 ++
 .../parquet/encoding/hybrid_rle/encoder.rs    |  166 +++
 .../src/parquet/encoding/hybrid_rle/mod.rs    |  263 ++++
 .../src/parquet/encoding/mod.rs               |   27 +
 .../src/parquet/encoding/plain_byte_array.rs  |   46 +
 .../src/parquet/encoding/uleb128.rs           |   97 ++
 .../src/parquet/encoding/zigzag_leb128.rs     |   69 +
 crates/polars-parquet/src/parquet/error.rs    |  134 ++
 .../src/parquet/indexes/index.rs              |  322 +++++
 .../src/parquet/indexes/intervals.rs          |  137 ++
 .../polars-parquet/src/parquet/indexes/mod.rs |  234 ++++
 .../parquet/metadata/column_chunk_metadata.rs |  210 +++
 .../src/parquet/metadata/column_descriptor.rs |   50 +
 .../src/parquet/metadata/column_order.rs      |   30 +
 .../src/parquet/metadata/file_metadata.rs     |  129 ++
 .../src/parquet/metadata/mod.rs               |   17 +
 .../src/parquet/metadata/row_metadata.rs      |  103 ++
 .../src/parquet/metadata/schema_descriptor.rs |  141 ++
 .../src/parquet/metadata/sort.rs              |   94 ++
 crates/polars-parquet/src/parquet/mod.rs      |   37 +
 crates/polars-parquet/src/parquet/page/mod.rs |  428 ++++++
 .../src/parquet/parquet_bridge.rs             |  704 ++++++++++
 .../src/parquet/read/column/mod.rs            |  204 +++
 .../src/parquet/read/column/stream.rs         |   51 +
 .../src/parquet/read/compression.rs           |  286 ++++
 .../src/parquet/read/indexes/deserialize.rs   |   27 +
 .../src/parquet/read/indexes/mod.rs           |    4 +
 .../src/parquet/read/indexes/read.rs          |  131 ++
 .../polars-parquet/src/parquet/read/levels.rs |   27 +
 .../src/parquet/read/metadata.rs              |  101 ++
 crates/polars-parquet/src/parquet/read/mod.rs |  237 ++++
 .../src/parquet/read/page/indexed_reader.rs   |  204 +++
 .../src/parquet/read/page/mod.rs              |   18 +
 .../src/parquet/read/page/reader.rs           |  306 +++++
 .../src/parquet/read/page/stream.rs           |  138 ++
 .../polars-parquet/src/parquet/read/stream.rs |   88 ++
 .../parquet/schema/io_message/from_message.rs | 1159 +++++++++++++++++
 .../src/parquet/schema/io_message/mod.rs      |    3 +
 .../parquet/schema/io_thrift/from_thrift.rs   |  134 ++
 .../src/parquet/schema/io_thrift/mod.rs       |   85 ++
 .../src/parquet/schema/io_thrift/to_thrift.rs |   82 ++
 .../polars-parquet/src/parquet/schema/mod.rs  |    7 +
 .../src/parquet/schema/types/basic_type.rs    |   16 +
 .../parquet/schema/types/converted_type.rs    |  238 ++++
 .../src/parquet/schema/types/mod.rs           |   17 +
 .../src/parquet/schema/types/parquet_type.rs  |  206 +++
 .../src/parquet/schema/types/physical_type.rs |   58 +
 .../src/parquet/schema/types/spec.rs          |  181 +++
 .../src/parquet/statistics/binary.rs          |   51 +
 .../src/parquet/statistics/boolean.rs         |   72 +
 .../parquet/statistics/fixed_len_binary.rs    |   76 ++
 .../src/parquet/statistics/mod.rs             |  134 ++
 .../src/parquet/statistics/primitive.rs       |   70 +
 crates/polars-parquet/src/parquet/types.rs    |  141 ++
 .../src/parquet/write/column_chunk.rs         |  208 +++
 .../src/parquet/write/compression.rs          |  160 +++
 .../src/parquet/write/dyn_iter.rs             |   65 +
 .../polars-parquet/src/parquet/write/file.rs  |  279 ++++
 .../src/parquet/write/indexes/mod.rs          |    4 +
 .../src/parquet/write/indexes/serialize.rs    |   78 ++
 .../src/parquet/write/indexes/write.rs        |   46 +
 .../polars-parquet/src/parquet/write/mod.rs   |   57 +
 .../polars-parquet/src/parquet/write/page.rs  |  243 ++++
 .../src/parquet/write/row_group.rs            |  200 +++
 .../src/parquet/write/statistics.rs           |  323 +++++
 .../src/parquet/write/stream.rs               |  192 +++
 crates/polars-utils/Cargo.toml                |    1 +
 crates/polars-utils/src/aliases.rs            |   57 +
 py-polars/Cargo.lock                          |   32 +-
 162 files changed, 13635 insertions(+), 308 deletions(-)
 create mode 100644 crates/polars-parquet/src/parquet/bloom_filter/hash.rs
 create mode 100644 crates/polars-parquet/src/parquet/bloom_filter/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/bloom_filter/read.rs
 create mode 100644 crates/polars-parquet/src/parquet/bloom_filter/split_block.rs
 create mode 100644 crates/polars-parquet/src/parquet/compression.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/binary.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/boolean.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/filtered_rle.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/fixed_len.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/hybrid_rle.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/native.rs
 create mode 100644 crates/polars-parquet/src/parquet/deserialize/utils.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/bitpacked/encode.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_bitpacked/encoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_byte_array/encoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_byte_array/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/encoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/hybrid_rle/bitmap.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/hybrid_rle/decoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/plain_byte_array.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/uleb128.rs
 create mode 100644 crates/polars-parquet/src/parquet/encoding/zigzag_leb128.rs
 create mode 100644 crates/polars-parquet/src/parquet/error.rs
 create mode 100644 crates/polars-parquet/src/parquet/indexes/index.rs
 create mode 100644 crates/polars-parquet/src/parquet/indexes/intervals.rs
 create mode 100644 crates/polars-parquet/src/parquet/indexes/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/column_chunk_metadata.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/column_descriptor.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/column_order.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/file_metadata.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/row_metadata.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/schema_descriptor.rs
 create mode 100644 crates/polars-parquet/src/parquet/metadata/sort.rs
 create mode 100644 crates/polars-parquet/src/parquet/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/page/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/parquet_bridge.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/column/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/column/stream.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/compression.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/indexes/deserialize.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/indexes/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/indexes/read.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/levels.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/metadata.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/page/indexed_reader.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/page/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/page/reader.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/page/stream.rs
 create mode 100644 crates/polars-parquet/src/parquet/read/stream.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/io_message/from_message.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/io_message/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/io_thrift/from_thrift.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/io_thrift/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/io_thrift/to_thrift.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/types/basic_type.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/types/converted_type.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/types/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/types/parquet_type.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/types/physical_type.rs
 create mode 100644 crates/polars-parquet/src/parquet/schema/types/spec.rs
 create mode 100644 crates/polars-parquet/src/parquet/statistics/binary.rs
 create mode 100644 crates/polars-parquet/src/parquet/statistics/boolean.rs
 create mode 100644 crates/polars-parquet/src/parquet/statistics/fixed_len_binary.rs
 create mode 100644 crates/polars-parquet/src/parquet/statistics/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/statistics/primitive.rs
 create mode 100644 crates/polars-parquet/src/parquet/types.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/column_chunk.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/compression.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/dyn_iter.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/file.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/indexes/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/indexes/serialize.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/indexes/write.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/mod.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/page.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/row_group.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/statistics.rs
 create mode 100644 crates/polars-parquet/src/parquet/write/stream.rs

diff --git a/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs b/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs
index f39a1523446c..f0bcbafca52f 100644
--- a/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs
+++ b/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs
@@ -7,9 +7,8 @@ use hashbrown::hash_map::RawEntryMut;
 use once_cell::sync::Lazy;
 use smartstring::{LazyCompact, SmartString};
 
-use crate::datatypes::PlIdHashMap;
+use crate::datatypes::{InitHashMaps2, PlIdHashMap};
 use crate::hashing::_HASHMAP_INIT_SIZE;
-use crate::prelude::InitHashMaps;
 
 /// We use atomic reference counting to determine how many threads use the
 /// string cache. If the refcount is zero, we may clear the string cache.
diff --git a/crates/polars-core/src/datatypes/aliases.rs b/crates/polars-core/src/datatypes/aliases.rs
index 421e7cd3c8a4..d5ce2da0974b 100644
--- a/crates/polars-core/src/datatypes/aliases.rs
+++ b/crates/polars-core/src/datatypes/aliases.rs
@@ -1,4 +1,5 @@
 pub use arrow::legacy::index::{IdxArr, IdxSize};
+pub use polars_utils::aliases::{InitHashMaps, PlHashMap, PlHashSet, PlIndexMap, PlIndexSet};
 
 use super::*;
 use crate::hashing::IdBuildHasher;
@@ -21,14 +22,10 @@ pub type IdxType = UInt32Type;
 #[cfg(feature = "bigidx")]
 pub type IdxType = UInt64Type;
 
-pub type PlHashMap<K, V> = hashbrown::HashMap<K, V, RandomState>;
-/// This hashmap has the uses an IdHasher
+/// This hashmap uses an IdHasher
 pub type PlIdHashMap<K, V> = hashbrown::HashMap<K, V, IdBuildHasher>;
-pub type PlHashSet<V> = hashbrown::HashSet<V, RandomState>;
-pub type PlIndexMap<K, V> = indexmap::IndexMap<K, V, RandomState>;
-pub type PlIndexSet<K> = indexmap::IndexSet<K, RandomState>;
 
-pub trait InitHashMaps {
+pub trait InitHashMaps2 {
     type HashMap;
 
     fn new() -> Self::HashMap;
@@ -36,53 +33,7 @@ pub trait InitHashMaps {
     fn with_capacity(capacity: usize) -> Self::HashMap;
 }
 
-impl<K, V> InitHashMaps for PlHashMap<K, V> {
-    type HashMap = Self;
-
-    fn new() -> Self::HashMap {
-        Self::with_capacity_and_hasher(0, Default::default())
-    }
-
-    fn with_capacity(capacity: usize) -> Self {
-        Self::with_capacity_and_hasher(capacity, Default::default())
-    }
-}
-impl<K> InitHashMaps for PlHashSet<K> {
-    type HashMap = Self;
-
-    fn new() -> Self::HashMap {
-        Self::with_capacity_and_hasher(0, Default::default())
-    }
-
-    fn with_capacity(capacity: usize) -> Self {
-        Self::with_capacity_and_hasher(capacity, Default::default())
-    }
-}
-
-impl<K> InitHashMaps for PlIndexSet<K> {
-    type HashMap = Self;
-
-    fn new() -> Self::HashMap {
-        Self::with_capacity_and_hasher(0, Default::default())
-    }
-
-    fn with_capacity(capacity: usize) -> Self::HashMap {
-        Self::with_capacity_and_hasher(capacity, Default::default())
-    }
-}
-
-impl<K, V> InitHashMaps for PlIndexMap<K, V> {
-    type HashMap = Self;
-
-    fn new() -> Self::HashMap {
-        Self::with_capacity_and_hasher(0, Default::default())
-    }
-
-    fn with_capacity(capacity: usize) -> Self::HashMap {
-        Self::with_capacity_and_hasher(capacity, Default::default())
-    }
-}
-impl<K, V> InitHashMaps for PlIdHashMap<K, V> {
+impl<K, V> InitHashMaps2 for PlIdHashMap<K, V> {
     type HashMap = Self;
 
     fn new() -> Self::HashMap {
diff --git a/crates/polars-core/src/datatypes/mod.rs b/crates/polars-core/src/datatypes/mod.rs
index a5f1ef643070..9151f6915037 100644
--- a/crates/polars-core/src/datatypes/mod.rs
+++ b/crates/polars-core/src/datatypes/mod.rs
@@ -21,7 +21,6 @@ use std::fmt::{Display, Formatter};
 use std::hash::{Hash, Hasher};
 use std::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign};
 
-use ahash::RandomState;
 pub use aliases::*;
 pub use any_value::*;
 use arrow::compute::comparison::Simd8;
diff --git a/crates/polars-error/Cargo.toml b/crates/polars-error/Cargo.toml
index 60e4800f073f..64b81ed950e6 100644
--- a/crates/polars-error/Cargo.toml
+++ b/crates/polars-error/Cargo.toml
@@ -12,7 +12,6 @@ description = "Error definitions for the Polars DataFrame library"
 arrow-format = { version = "0.8.1", optional = true }
 avro-schema = { workspace = true, optional = true }
 object_store = { workspace = true, optional = true }
-parquet2 = { workspace = true, optional = true }
 regex = { workspace = true, optional = true }
 simdutf8 = { workspace = true }
 thiserror = { workspace = true }
diff --git a/crates/polars-error/src/lib.rs b/crates/polars-error/src/lib.rs
index be7a72debe32..c44131717b81 100644
--- a/crates/polars-error/src/lib.rs
+++ b/crates/polars-error/src/lib.rs
@@ -91,13 +91,6 @@ impl From<object_store::Error> for PolarsError {
     }
 }
 
-#[cfg(feature = "parquet2")]
-impl From<parquet2::error::Error> for PolarsError {
-    fn from(err: parquet2::error::Error) -> Self {
-        polars_err!(ComputeError: "parquet error: {err:?}")
-    }
-}
-
 #[cfg(feature = "avro-schema")]
 impl From<avro_schema::error::Error> for PolarsError {
     fn from(value: avro_schema::error::Error) -> Self {
diff --git a/crates/polars-parquet/Cargo.toml b/crates/polars-parquet/Cargo.toml
index 673d000740ac..f1b8791c8d70 100644
--- a/crates/polars-parquet/Cargo.toml
+++ b/crates/polars-parquet/Cargo.toml
@@ -20,14 +20,26 @@ ethnum = { workspace = true }
 fallible-streaming-iterator = { workspace = true, optional = true }
 futures = { workspace = true, optional = true }
 num-traits = { workspace = true }
-parquet2 = { workspace = true, optional = true, default-features = true, features = ["async"] }
-polars-error = { workspace = true, features = ["parquet2"] }
+polars-error = { workspace = true }
+polars-utils = { workspace = true }
 simdutf8 = { workspace = true }
 
-[features]
-bloom_filter = ["parquet2/bloom_filter"]
-async = ["futures"]
+parquet-format-safe = "0.2"
+seq-macro = { version = "0.3", default-features = false }
+streaming-decompression = "0.1"
+
+async-stream = { version = "0.3.3", optional = true }
+
+brotli = { version = "^3.3", optional = true }
+flate2 = { version = "^1.0", optional = true, default-features = false }
+lz4 = { version = "1.24", optional = true }
+serde = { version = "^1.0", optional = true, features = ["derive"] }
+snap = { version = "^1.1", optional = true }
+zstd = { version = "^0.12", optional = true, default-features = false }
 
+xxhash-rust = { version = "0.8", optional = true, features = ["xxh64"] }
+
+[features]
 compression = [
   "zstd",
   "gzip",
@@ -37,8 +49,10 @@ compression = [
 ]
 
 # compression backends
-zstd = ["parquet2/zstd"]
-snappy = ["parquet2/snappy"]
-gzip = ["parquet2/gzip"]
-lz4 = ["parquet2/lz4"]
-brotli = ["parquet2/brotli"]
+snappy = ["snap"]
+gzip = ["flate2/rust_backend"]
+gzip_zlib_ng = ["flate2/zlib-ng"]
+
+async = ["async-stream", "futures", "parquet-format-safe/async"]
+bloom_filter = ["xxhash-rust"]
+serde_types = ["serde"]
diff --git a/crates/polars-parquet/LICENSE b/crates/polars-parquet/LICENSE
index a4b4b70523c3..7fd76611dd29 100644
--- a/crates/polars-parquet/LICENSE
+++ b/crates/polars-parquet/LICENSE
@@ -1,5 +1,5 @@
 Some of the code in this crate is subject to the Apache 2 license below, as it
-was taken from the arrow2 Rust crate in October 2023. Later changes are subject
+was taken from the arrow2 and parquet2 Rust crate in October 2023. Later changes are subject
 to the MIT license in ../../LICENSE.
 
 
diff --git a/crates/polars-parquet/src/arrow/mod.rs b/crates/polars-parquet/src/arrow/mod.rs
index 1ccb35dfeccf..aff9a98c9670 100644
--- a/crates/polars-parquet/src/arrow/mod.rs
+++ b/crates/polars-parquet/src/arrow/mod.rs
@@ -3,6 +3,6 @@ pub mod write;
 
 #[cfg(feature = "io_parquet_bloom_filter")]
 #[cfg_attr(docsrs, doc(cfg(feature = "io_parquet_bloom_filter")))]
-pub use parquet2::bloom_filter;
+pub use crate::parquet::bloom_filter;
 
 const ARROW_SCHEMA_META_KEY: &str = "ARROW:schema";
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs
index 902fa69d0031..8bde2ce8ea46 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs
@@ -5,10 +5,6 @@ use arrow::array::{Array, BinaryArray, Utf8Array};
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::{DataType, PhysicalType};
 use arrow::offset::Offset;
-use parquet2::deserialize::SliceFilteredIter;
-use parquet2::encoding::{delta_length_byte_array, hybrid_rle, Encoding};
-use parquet2::page::{split_buffer, DataPage, DictPage};
-use parquet2::schema::Repetition;
 use polars_error::{to_compute_err, PolarsResult};
 
 use super::super::utils::{
@@ -17,6 +13,10 @@ use super::super::utils::{
 };
 use super::super::{utils, Pages};
 use super::utils::*;
+use crate::parquet::deserialize::SliceFilteredIter;
+use crate::parquet::encoding::{delta_length_byte_array, hybrid_rle, Encoding};
+use crate::parquet::page::{split_buffer, DataPage, DictPage};
+use crate::parquet::schema::Repetition;
 
 #[derive(Debug)]
 pub(super) struct Required<'a> {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/dictionary.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/dictionary.rs
index d4c91dbc5d72..20548f201b46 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/binary/dictionary.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/dictionary.rs
@@ -4,7 +4,6 @@ use arrow::array::{Array, BinaryArray, DictionaryArray, DictionaryKey, Utf8Array
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::{DataType, PhysicalType};
 use arrow::offset::Offset;
-use parquet2::page::DictPage;
 use polars_error::PolarsResult;
 
 use super::super::dictionary::*;
@@ -12,6 +11,7 @@ use super::super::utils::MaybeNext;
 use super::super::Pages;
 use super::utils::{Binary, SizedBinaryIter};
 use crate::arrow::read::deserialize::nested_utils::{InitNested, NestedState};
+use crate::parquet::page::DictPage;
 
 /// An iterator adapter over [`Pages`] assumed to be encoded as parquet's dictionary-encoded binary representation
 #[derive(Debug)]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs
index 37c0a35006f6..750d81dae1b6 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs
@@ -4,9 +4,6 @@ use arrow::array::Array;
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
 use arrow::offset::Offset;
-use parquet2::encoding::Encoding;
-use parquet2::page::{split_buffer, DataPage, DictPage};
-use parquet2::schema::Repetition;
 use polars_error::PolarsResult;
 
 use super::super::nested_utils::*;
@@ -15,6 +12,9 @@ use super::super::utils::MaybeNext;
 use super::basic::{deserialize_plain, finish, Dict, ValuesDictionary};
 use super::utils::*;
 use crate::arrow::read::Pages;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{split_buffer, DataPage, DictPage};
+use crate::parquet::schema::Repetition;
 
 #[derive(Debug)]
 enum State<'a> {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs
index 413cfd15da35..1c736d6d8a8c 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs
@@ -4,10 +4,6 @@ use arrow::array::BooleanArray;
 use arrow::bitmap::utils::BitmapIter;
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
-use parquet2::deserialize::SliceFilteredIter;
-use parquet2::encoding::Encoding;
-use parquet2::page::{split_buffer, DataPage, DictPage};
-use parquet2::schema::Repetition;
 use polars_error::PolarsResult;
 
 use super::super::utils::{
@@ -15,6 +11,10 @@ use super::super::utils::{
     FilteredOptionalPageValidity, MaybeNext, OptionalPageValidity,
 };
 use super::super::{utils, Pages};
+use crate::parquet::deserialize::SliceFilteredIter;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{split_buffer, DataPage, DictPage};
+use crate::parquet::schema::Repetition;
 
 #[derive(Debug)]
 struct Values<'a>(BitmapIter<'a>);
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/boolean/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/boolean/nested.rs
index d3a8c0b305c4..e10a6dfb2d2e 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/boolean/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/boolean/nested.rs
@@ -4,14 +4,14 @@ use arrow::array::BooleanArray;
 use arrow::bitmap::utils::BitmapIter;
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
-use parquet2::encoding::Encoding;
-use parquet2::page::{split_buffer, DataPage, DictPage};
-use parquet2::schema::Repetition;
 use polars_error::PolarsResult;
 
 use super::super::nested_utils::*;
 use super::super::utils::MaybeNext;
 use super::super::{utils, Pages};
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{split_buffer, DataPage, DictPage};
+use crate::parquet::schema::Repetition;
 
 // The state of a `DataPage` of `Boolean` parquet boolean type
 #[allow(clippy::large_enum_variant)]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/dictionary/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/dictionary/mod.rs
index acb795eb04d4..9bd21d9946e1 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/dictionary/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/dictionary/mod.rs
@@ -5,17 +5,17 @@ use std::collections::VecDeque;
 use arrow::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray};
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
-use parquet2::deserialize::SliceFilteredIter;
-use parquet2::encoding::hybrid_rle::HybridRleDecoder;
-use parquet2::encoding::Encoding;
-use parquet2::page::{DataPage, DictPage, Page};
-use parquet2::schema::Repetition;
 
 use super::utils::{
     self, dict_indices_decoder, extend_from_decoder, get_selected_rows, DecodedState, Decoder,
     FilteredOptionalPageValidity, MaybeNext, OptionalPageValidity,
 };
 use super::Pages;
+use crate::parquet::deserialize::SliceFilteredIter;
+use crate::parquet::encoding::hybrid_rle::HybridRleDecoder;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{DataPage, DictPage, Page};
+use crate::parquet::schema::Repetition;
 
 // The state of a `DataPage` of `Primitive` parquet primitive type
 #[derive(Debug)]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/dictionary/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/dictionary/nested.rs
index 03aded4b2b97..7da9ff48314e 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/dictionary/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/dictionary/nested.rs
@@ -3,16 +3,16 @@ use std::collections::VecDeque;
 use arrow::array::{Array, DictionaryArray, DictionaryKey};
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
-use parquet2::encoding::hybrid_rle::HybridRleDecoder;
-use parquet2::encoding::Encoding;
-use parquet2::page::{DataPage, DictPage, Page};
-use parquet2::schema::Repetition;
 use polars_error::{polars_err, PolarsResult};
 
 use super::super::super::Pages;
 use super::super::nested_utils::*;
 use super::super::utils::{dict_indices_decoder, not_implemented, MaybeNext, PageState};
 use super::finish_key;
+use crate::parquet::encoding::hybrid_rle::HybridRleDecoder;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{DataPage, DictPage, Page};
+use crate::parquet::schema::Repetition;
 
 // The state of a required DataPage with a boolean physical type
 #[derive(Debug)]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs
index e51122eec41e..1d7a0bf8dc68 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs
@@ -3,10 +3,6 @@ use std::collections::VecDeque;
 use arrow::array::FixedSizeBinaryArray;
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
-use parquet2::deserialize::SliceFilteredIter;
-use parquet2::encoding::{hybrid_rle, Encoding};
-use parquet2::page::{split_buffer, DataPage, DictPage};
-use parquet2::schema::Repetition;
 use polars_error::PolarsResult;
 
 use super::super::utils::{
@@ -16,6 +12,10 @@ use super::super::utils::{
 };
 use super::super::Pages;
 use super::utils::FixedSizeBinary;
+use crate::parquet::deserialize::SliceFilteredIter;
+use crate::parquet::encoding::{hybrid_rle, Encoding};
+use crate::parquet::page::{split_buffer, DataPage, DictPage};
+use crate::parquet::schema::Repetition;
 
 pub(super) type Dict = Vec<u8>;
 
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/dictionary.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/dictionary.rs
index 346f092fab84..f092e94314ef 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/dictionary.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/dictionary.rs
@@ -3,13 +3,13 @@ use std::collections::VecDeque;
 use arrow::array::{Array, DictionaryArray, DictionaryKey, FixedSizeBinaryArray};
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
-use parquet2::page::DictPage;
 use polars_error::PolarsResult;
 
 use super::super::dictionary::*;
 use super::super::utils::MaybeNext;
 use super::super::Pages;
 use crate::arrow::read::deserialize::nested_utils::{InitNested, NestedState};
+use crate::parquet::page::DictPage;
 
 /// An iterator adapter over [`Pages`] assumed to be encoded as parquet's dictionary-encoded binary representation
 #[derive(Debug)]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs
index 11a1e3f044a7..54ef413363a8 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs
@@ -3,9 +3,6 @@ use std::collections::VecDeque;
 use arrow::array::FixedSizeBinaryArray;
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
-use parquet2::encoding::Encoding;
-use parquet2::page::{DataPage, DictPage};
-use parquet2::schema::Repetition;
 use polars_error::PolarsResult;
 
 use super::super::utils::{not_implemented, MaybeNext, PageState};
@@ -16,6 +13,9 @@ use crate::arrow::read::deserialize::fixed_size_binary::basic::{
 use crate::arrow::read::deserialize::nested_utils::{next, NestedDecoder};
 use crate::arrow::read::deserialize::utils::Pushable;
 use crate::arrow::read::{InitNested, NestedState, Pages};
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{DataPage, DictPage};
+use crate::parquet::schema::Repetition;
 
 #[derive(Debug)]
 enum State<'a> {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/mod.rs
index 3b8281373c96..8e43c0e100c1 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/mod.rs
@@ -14,13 +14,13 @@ mod utils;
 use arrow::array::{Array, DictionaryKey, FixedSizeListArray, ListArray, MapArray};
 use arrow::datatypes::{DataType, Field, IntervalUnit};
 use arrow::offset::Offsets;
-use parquet2::read::get_page_iterator as _get_page_iterator;
-use parquet2::schema::types::PrimitiveType;
 use simple::page_iter_to_arrays;
 
 pub use self::nested_utils::{init_nested, InitNested, NestedArrayIter, NestedState};
 pub use self::struct_::StructIterator;
 use super::*;
+use crate::parquet::read::get_page_iterator as _get_page_iterator;
+use crate::parquet::schema::types::PrimitiveType;
 
 /// Creates a new iterator of compressed pages.
 pub fn get_page_iterator<R: Read + Seek>(
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/nested.rs
index 6741c05ee852..3d768c8c8745 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/nested.rs
@@ -2,11 +2,11 @@ use arrow::array::PrimitiveArray;
 use arrow::datatypes::{DataType, Field};
 use arrow::match_integer_type;
 use ethnum::I256;
-use parquet2::schema::types::PrimitiveType;
 use polars_error::polars_bail;
 
 use super::nested_utils::{InitNested, NestedArrayIter};
 use super::*;
+use crate::parquet::schema::types::PrimitiveType;
 
 /// Converts an iterator of arrays to a trait object returning trait objects
 #[inline]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs
index f27841f7f5b4..da88b18a9731 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs
@@ -2,14 +2,14 @@ use std::collections::VecDeque;
 
 use arrow::array::Array;
 use arrow::bitmap::MutableBitmap;
-use parquet2::encoding::hybrid_rle::HybridRleDecoder;
-use parquet2::page::{split_buffer, DataPage, DictPage, Page};
-use parquet2::read::levels::get_bit_width;
 use polars_error::PolarsResult;
 
 use super::super::Pages;
 pub use super::utils::Zip;
 use super::utils::{DecodedState, MaybeNext, PageState};
+use crate::parquet::encoding::hybrid_rle::HybridRleDecoder;
+use crate::parquet::page::{split_buffer, DataPage, DictPage, Page};
+use crate::parquet::read::levels::get_bit_width;
 
 /// trait describing deserialized repetition and definition levels
 pub trait Nested: std::fmt::Debug + Send + Sync {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/null/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/null/mod.rs
index ad6227a45f2e..b65ccae1c75f 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/null/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/null/mod.rs
@@ -3,9 +3,9 @@ mod nested;
 use arrow::array::NullArray;
 use arrow::datatypes::DataType;
 pub(super) use nested::NestedIter;
-use parquet2::page::Page;
 
 use super::super::{ArrayIter, Pages};
+use crate::parquet::page::Page;
 
 /// Converts [`Pages`] to an [`ArrayIter`]
 pub fn iter_to_arrays<'a, I>(
@@ -56,14 +56,14 @@ where
 mod tests {
     use arrow::array::NullArray;
     use arrow::datatypes::DataType;
-    use parquet2::encoding::Encoding;
-    use parquet2::error::Error as ParquetError;
-    use parquet2::metadata::Descriptor;
-    use parquet2::page::{DataPage, DataPageHeader, DataPageHeaderV1, Page};
-    use parquet2::schema::types::{PhysicalType, PrimitiveType};
     use polars_error::*;
 
     use super::iter_to_arrays;
+    use crate::parquet::encoding::Encoding;
+    use crate::parquet::error::Error as ParquetError;
+    use crate::parquet::metadata::Descriptor;
+    use crate::parquet::page::{DataPage, DataPageHeader, DataPageHeaderV1, Page};
+    use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
 
     #[test]
     fn limit() {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/null/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/null/nested.rs
index 0da98d13f17d..ccd3b160b674 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/null/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/null/nested.rs
@@ -2,12 +2,12 @@ use std::collections::VecDeque;
 
 use arrow::array::NullArray;
 use arrow::datatypes::DataType;
-use parquet2::page::{DataPage, DictPage};
 use polars_error::PolarsResult;
 
 use super::super::nested_utils::*;
 use super::super::{utils, Pages};
 use crate::arrow::read::deserialize::utils::DecodedState;
+use crate::parquet::page::{DataPage, DictPage};
 
 impl<'a> utils::PageState<'a> for usize {
     fn len(&self) -> usize {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs
index acd6ea5ae785..a918b578cd85 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs
@@ -4,15 +4,15 @@ use arrow::array::MutablePrimitiveArray;
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
 use arrow::types::NativeType;
-use parquet2::deserialize::SliceFilteredIter;
-use parquet2::encoding::{hybrid_rle, Encoding};
-use parquet2::page::{split_buffer, DataPage, DictPage};
-use parquet2::schema::Repetition;
-use parquet2::types::{decode, NativeType as ParquetNativeType};
 use polars_error::PolarsResult;
 
 use super::super::utils::{get_selected_rows, FilteredOptionalPageValidity, OptionalPageValidity};
 use super::super::{utils, Pages};
+use crate::parquet::deserialize::SliceFilteredIter;
+use crate::parquet::encoding::{hybrid_rle, Encoding};
+use crate::parquet::page::{split_buffer, DataPage, DictPage};
+use crate::parquet::schema::Repetition;
+use crate::parquet::types::{decode, NativeType as ParquetNativeType};
 
 #[derive(Debug)]
 pub(super) struct FilteredRequiredValues<'a> {
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/dictionary.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/dictionary.rs
index 6f476cbafe79..3996f388f31b 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/dictionary.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/dictionary.rs
@@ -4,8 +4,6 @@ use arrow::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray};
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
 use arrow::types::NativeType;
-use parquet2::page::DictPage;
-use parquet2::types::NativeType as ParquetNativeType;
 use polars_error::PolarsResult;
 
 use super::super::dictionary::{nested_next_dict, *};
@@ -13,6 +11,8 @@ use super::super::nested_utils::{InitNested, NestedState};
 use super::super::utils::MaybeNext;
 use super::super::Pages;
 use super::basic::deserialize_plain;
+use crate::parquet::page::DictPage;
+use crate::parquet::types::NativeType as ParquetNativeType;
 
 fn read_dict<P, T, F>(data_type: DataType, op: F, dict: &DictPage) -> Box<dyn Array>
 where
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs
index 8472a54bda3d..973b70537b37 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs
@@ -5,12 +5,6 @@ use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
 use arrow::types::NativeType;
 use num_traits::AsPrimitive;
-use parquet2::deserialize::SliceFilteredIter;
-use parquet2::encoding::delta_bitpacked::Decoder;
-use parquet2::encoding::Encoding;
-use parquet2::page::{split_buffer, DataPage, DictPage};
-use parquet2::schema::Repetition;
-use parquet2::types::NativeType as ParquetNativeType;
 use polars_error::{to_compute_err, PolarsResult};
 
 use super::super::{utils, Pages};
@@ -18,6 +12,12 @@ use super::basic::{finish, PrimitiveDecoder, State as PrimitiveState};
 use crate::arrow::read::deserialize::utils::{
     get_selected_rows, FilteredOptionalPageValidity, OptionalPageValidity,
 };
+use crate::parquet::deserialize::SliceFilteredIter;
+use crate::parquet::encoding::delta_bitpacked::Decoder;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{split_buffer, DataPage, DictPage};
+use crate::parquet::schema::Repetition;
+use crate::parquet::types::NativeType as ParquetNativeType;
 
 /// The state of a [`DataPage`] of an integer parquet type (i32 or i64)
 #[derive(Debug)]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs
index 3b87d3fcde92..11b59b70ffbd 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs
@@ -4,15 +4,15 @@ use arrow::array::PrimitiveArray;
 use arrow::bitmap::MutableBitmap;
 use arrow::datatypes::DataType;
 use arrow::types::NativeType;
-use parquet2::encoding::Encoding;
-use parquet2::page::{DataPage, DictPage};
-use parquet2::schema::Repetition;
-use parquet2::types::{decode, NativeType as ParquetNativeType};
 use polars_error::PolarsResult;
 
 use super::super::nested_utils::*;
 use super::super::{utils, Pages};
 use super::basic::{deserialize_plain, Values, ValuesDictionary};
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{DataPage, DictPage};
+use crate::parquet::schema::Repetition;
+use crate::parquet::types::{decode, NativeType as ParquetNativeType};
 
 // The state of a `DataPage` of `Primitive` parquet primitive type
 #[allow(clippy::large_enum_variant)]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/simple.rs b/crates/polars-parquet/src/arrow/read/deserialize/simple.rs
index 119b569116f5..60171432ec98 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/simple.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/simple.rs
@@ -3,14 +3,14 @@ use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
 use arrow::match_integer_type;
 use arrow::types::{days_ms, i256, NativeType};
 use ethnum::I256;
-use parquet2::schema::types::{
-    PhysicalType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
-};
-use parquet2::types::int96_to_i64_ns;
 use polars_error::{polars_bail, PolarsResult};
 
 use super::super::{ArrayIter, Pages};
 use super::{binary, boolean, fixed_size_binary, null, primitive};
+use crate::parquet::schema::types::{
+    PhysicalType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
+};
+use crate::parquet::types::int96_to_i64_ns;
 
 /// Converts an iterator of arrays to a trait object returning trait objects
 #[inline]
diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils.rs
index 8e3eb98147c9..767180ed0db4 100644
--- a/crates/polars-parquet/src/arrow/read/deserialize/utils.rs
+++ b/crates/polars-parquet/src/arrow/read/deserialize/utils.rs
@@ -2,16 +2,16 @@ use std::collections::VecDeque;
 
 use arrow::bitmap::utils::BitmapIter;
 use arrow::bitmap::MutableBitmap;
-use parquet2::deserialize::{
-    FilteredHybridEncoded, FilteredHybridRleDecoderIter, HybridDecoderBitmapIter, HybridEncoded,
-};
-use parquet2::encoding::hybrid_rle;
-use parquet2::indexes::Interval;
-use parquet2::page::{split_buffer, DataPage, DictPage, Page};
-use parquet2::schema::Repetition;
 use polars_error::{polars_err, to_compute_err, PolarsError, PolarsResult};
 
 use super::super::Pages;
+use crate::parquet::deserialize::{
+    FilteredHybridEncoded, FilteredHybridRleDecoderIter, HybridDecoderBitmapIter, HybridEncoded,
+};
+use crate::parquet::encoding::hybrid_rle;
+use crate::parquet::indexes::Interval;
+use crate::parquet::page::{split_buffer, DataPage, DictPage, Page};
+use crate::parquet::schema::Repetition;
 
 pub fn not_implemented(page: &DataPage) -> PolarsError {
     let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
diff --git a/crates/polars-parquet/src/arrow/read/file.rs b/crates/polars-parquet/src/arrow/read/file.rs
index 109011175c7b..0d026e4d74a9 100644
--- a/crates/polars-parquet/src/arrow/read/file.rs
+++ b/crates/polars-parquet/src/arrow/read/file.rs
@@ -3,11 +3,11 @@ use std::io::{Read, Seek};
 use arrow::array::Array;
 use arrow::chunk::Chunk;
 use arrow::datatypes::Schema;
-use parquet2::indexes::FilteredPage;
 use polars_error::PolarsResult;
 
 use super::{RowGroupDeserializer, RowGroupMetaData};
 use crate::arrow::read::read_columns_many;
+use crate::parquet::indexes::FilteredPage;
 
 /// An iterator of [`Chunk`]s coming from row groups of a parquet file.
 ///
diff --git a/crates/polars-parquet/src/arrow/read/indexes/binary.rs b/crates/polars-parquet/src/arrow/read/indexes/binary.rs
index 83de6a6f525a..5e115feb4253 100644
--- a/crates/polars-parquet/src/arrow/read/indexes/binary.rs
+++ b/crates/polars-parquet/src/arrow/read/indexes/binary.rs
@@ -1,10 +1,10 @@
 use arrow::array::{Array, BinaryArray, PrimitiveArray, Utf8Array};
 use arrow::datatypes::{DataType, PhysicalType};
 use arrow::trusted_len::TrustedLen;
-use parquet2::indexes::PageIndex;
 use polars_error::{to_compute_err, PolarsResult};
 
 use super::ColumnPageStatistics;
+use crate::parquet::indexes::PageIndex;
 
 pub fn deserialize(
     indexes: &[PageIndex<Vec<u8>>],
diff --git a/crates/polars-parquet/src/arrow/read/indexes/boolean.rs b/crates/polars-parquet/src/arrow/read/indexes/boolean.rs
index 5c809673eba1..b6414e24a621 100644
--- a/crates/polars-parquet/src/arrow/read/indexes/boolean.rs
+++ b/crates/polars-parquet/src/arrow/read/indexes/boolean.rs
@@ -1,7 +1,7 @@
 use arrow::array::{BooleanArray, PrimitiveArray};
-use parquet2::indexes::PageIndex;
 
 use super::ColumnPageStatistics;
+use crate::parquet::indexes::PageIndex;
 
 pub fn deserialize(indexes: &[PageIndex<bool>]) -> ColumnPageStatistics {
     ColumnPageStatistics {
diff --git a/crates/polars-parquet/src/arrow/read/indexes/fixed_len_binary.rs b/crates/polars-parquet/src/arrow/read/indexes/fixed_len_binary.rs
index c6cede8dd466..1a99e0b3e73a 100644
--- a/crates/polars-parquet/src/arrow/read/indexes/fixed_len_binary.rs
+++ b/crates/polars-parquet/src/arrow/read/indexes/fixed_len_binary.rs
@@ -2,9 +2,9 @@ use arrow::array::{Array, FixedSizeBinaryArray, MutableFixedSizeBinaryArray, Pri
 use arrow::datatypes::{DataType, PhysicalType, PrimitiveType};
 use arrow::trusted_len::TrustedLen;
 use arrow::types::{i256, NativeType};
-use parquet2::indexes::PageIndex;
 
 use super::ColumnPageStatistics;
+use crate::parquet::indexes::PageIndex;
 
 pub fn deserialize(indexes: &[PageIndex<Vec<u8>>], data_type: DataType) -> ColumnPageStatistics {
     ColumnPageStatistics {
diff --git a/crates/polars-parquet/src/arrow/read/indexes/mod.rs b/crates/polars-parquet/src/arrow/read/indexes/mod.rs
index 1abd34c5a968..60be17db158f 100644
--- a/crates/polars-parquet/src/arrow/read/indexes/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/indexes/mod.rs
@@ -1,12 +1,12 @@
 //! API to perform page-level filtering (also known as indexes)
-use parquet2::error::Error as ParquetError;
-use parquet2::indexes::{
+use crate::parquet::error::Error as ParquetError;
+use crate::parquet::indexes::{
     select_pages, BooleanIndex, ByteIndex, FixedLenByteIndex, Index as ParquetIndex, NativeIndex,
     PageLocation,
 };
-use parquet2::metadata::{ColumnChunkMetaData, RowGroupMetaData};
-use parquet2::read::{read_columns_indexes as _read_columns_indexes, read_pages_locations};
-use parquet2::schema::types::PhysicalType as ParquetPhysicalType;
+use crate::parquet::metadata::{ColumnChunkMetaData, RowGroupMetaData};
+use crate::parquet::read::{read_columns_indexes as _read_columns_indexes, read_pages_locations};
+use crate::parquet::schema::types::PhysicalType as ParquetPhysicalType;
 
 mod binary;
 mod boolean;
@@ -18,10 +18,10 @@ use std::io::{Read, Seek};
 
 use arrow::array::{Array, UInt64Array};
 use arrow::datatypes::{DataType, Field, PhysicalType, PrimitiveType};
-pub use parquet2::indexes::{FilteredPage, Interval};
 use polars_error::{polars_bail, PolarsResult};
 
 use super::get_field_pages;
+pub use crate::parquet::indexes::{FilteredPage, Interval};
 
 /// Page statistics of an Arrow field.
 #[derive(Debug, PartialEq)]
@@ -83,7 +83,7 @@ fn deserialize(
                     let index = index.as_any().downcast_ref::<NativeIndex<i32>>().unwrap();
                     Ok(primitive::deserialize_i32(&index.indexes, data_type).into())
                 },
-                parquet2::schema::types::PhysicalType::Int64 => {
+                crate::parquet::schema::types::PhysicalType::Int64 => {
                     let index = index.as_any().downcast_ref::<NativeIndex<i64>>().unwrap();
                     Ok(
                         primitive::deserialize_i64(
@@ -94,7 +94,7 @@ fn deserialize(
                         .into(),
                     )
                 },
-                parquet2::schema::types::PhysicalType::FixedLenByteArray(_) => {
+                crate::parquet::schema::types::PhysicalType::FixedLenByteArray(_) => {
                     let index = index.as_any().downcast_ref::<FixedLenByteIndex>().unwrap();
                     Ok(fixed_len_binary::deserialize(&index.indexes, data_type).into())
                 },
@@ -108,7 +108,7 @@ fn deserialize(
                     let index = index.as_any().downcast_ref::<NativeIndex<i32>>().unwrap();
                     Ok(primitive::deserialize_i32(&index.indexes, data_type).into())
                 },
-                parquet2::schema::types::PhysicalType::Int64 => {
+                crate::parquet::schema::types::PhysicalType::Int64 => {
                     let index = index.as_any().downcast_ref::<NativeIndex<i64>>().unwrap();
                     Ok(
                         primitive::deserialize_i64(
@@ -119,7 +119,7 @@ fn deserialize(
                         .into(),
                     )
                 },
-                parquet2::schema::types::PhysicalType::FixedLenByteArray(_) => {
+                crate::parquet::schema::types::PhysicalType::FixedLenByteArray(_) => {
                     let index = index.as_any().downcast_ref::<FixedLenByteIndex>().unwrap();
                     Ok(fixed_len_binary::deserialize(&index.indexes, data_type).into())
                 },
@@ -153,7 +153,7 @@ fn deserialize(
                         .into(),
                     )
                 },
-                parquet2::schema::types::PhysicalType::Int96 => {
+                crate::parquet::schema::types::PhysicalType::Int96 => {
                     let index = index
                         .as_any()
                         .downcast_ref::<NativeIndex<[u32; 3]>>()
diff --git a/crates/polars-parquet/src/arrow/read/indexes/primitive.rs b/crates/polars-parquet/src/arrow/read/indexes/primitive.rs
index fd551c35a2b0..c2aba43f07cc 100644
--- a/crates/polars-parquet/src/arrow/read/indexes/primitive.rs
+++ b/crates/polars-parquet/src/arrow/read/indexes/primitive.rs
@@ -3,11 +3,13 @@ use arrow::datatypes::{DataType, TimeUnit};
 use arrow::trusted_len::TrustedLen;
 use arrow::types::{i256, NativeType};
 use ethnum::I256;
-use parquet2::indexes::PageIndex;
-use parquet2::schema::types::{PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit};
-use parquet2::types::int96_to_i64_ns;
 
 use super::ColumnPageStatistics;
+use crate::parquet::indexes::PageIndex;
+use crate::parquet::schema::types::{
+    PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
+};
+use crate::parquet::types::int96_to_i64_ns;
 
 #[inline]
 fn deserialize_int32<I: TrustedLen<Item = Option<i32>>>(
diff --git a/crates/polars-parquet/src/arrow/read/mod.rs b/crates/polars-parquet/src/arrow/read/mod.rs
index 1a8cdc4c05b7..02d1336d11fc 100644
--- a/crates/polars-parquet/src/arrow/read/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/mod.rs
@@ -19,17 +19,22 @@ pub use deserialize::{
 pub use file::{FileReader, RowGroupReader};
 #[cfg(feature = "async")]
 use futures::{AsyncRead, AsyncSeek};
-// re-exports of parquet2's relevant APIs
-pub use parquet2::{
+use polars_error::PolarsResult;
+pub use row_group::*;
+pub use schema::{infer_schema, FileMetaData};
+
+#[cfg(feature = "async")]
+pub use crate::parquet::read::{get_page_stream, read_metadata_async as _read_metadata_async};
+// re-exports of crate::parquet's relevant APIs
+pub use crate::parquet::{
     error::Error as ParquetError,
     fallible_streaming_iterator,
     metadata::{ColumnChunkMetaData, ColumnDescriptor, RowGroupMetaData},
     page::{CompressedDataPage, DataPageHeader, Page},
     read::{
-        decompress, get_column_iterator, get_page_stream,
-        read_columns_indexes as _read_columns_indexes, read_metadata as _read_metadata,
-        read_metadata_async as _read_metadata_async, read_pages_locations, BasicDecompressor,
-        Decompressor, MutStreamingIterator, PageFilter, PageReader, ReadColumnIterator, State,
+        decompress, get_column_iterator, read_columns_indexes as _read_columns_indexes,
+        read_metadata as _read_metadata, read_pages_locations, BasicDecompressor, Decompressor,
+        MutStreamingIterator, PageFilter, PageReader, ReadColumnIterator, State,
     },
     schema::types::{
         GroupLogicalType, ParquetType, PhysicalType, PrimitiveConvertedType, PrimitiveLogicalType,
@@ -38,9 +43,6 @@ pub use parquet2::{
     types::int96_to_i64_ns,
     FallibleStreamingIterator,
 };
-use polars_error::PolarsResult;
-pub use row_group::*;
-pub use schema::{infer_schema, FileMetaData};
 
 /// Trait describing a [`FallibleStreamingIterator`] of [`Page`]
 pub trait Pages:
diff --git a/crates/polars-parquet/src/arrow/read/row_group.rs b/crates/polars-parquet/src/arrow/read/row_group.rs
index 24c7c9c64d40..a988a6d8c562 100644
--- a/crates/polars-parquet/src/arrow/read/row_group.rs
+++ b/crates/polars-parquet/src/arrow/read/row_group.rs
@@ -3,13 +3,13 @@ use std::io::{Read, Seek};
 use arrow::array::Array;
 use arrow::chunk::Chunk;
 use arrow::datatypes::Field;
-use parquet2::indexes::FilteredPage;
-use parquet2::metadata::ColumnChunkMetaData;
-use parquet2::read::{BasicDecompressor, IndexedPageReader, PageMetaData, PageReader};
 use polars_error::PolarsResult;
 
 use super::{ArrayIter, RowGroupMetaData};
 use crate::arrow::read::column_iter_to_arrays;
+use crate::parquet::indexes::FilteredPage;
+use crate::parquet::metadata::ColumnChunkMetaData;
+use crate::parquet::read::{BasicDecompressor, IndexedPageReader, PageMetaData, PageReader};
 
 /// An [`Iterator`] of [`Chunk`] that (dynamically) adapts a vector of iterators of [`Array`] into
 /// an iterator of [`Chunk`].
@@ -132,8 +132,12 @@ where
 }
 
 type Pages = Box<
-    dyn Iterator<Item = std::result::Result<parquet2::page::CompressedPage, parquet2::error::Error>>
-        + Sync
+    dyn Iterator<
+            Item = std::result::Result<
+                crate::parquet::page::CompressedPage,
+                crate::parquet::error::Error,
+            >,
+        > + Sync
         + Send,
 >;
 
diff --git a/crates/polars-parquet/src/arrow/read/schema/convert.rs b/crates/polars-parquet/src/arrow/read/schema/convert.rs
index 549eaf654d1d..3f1e961c0dd4 100644
--- a/crates/polars-parquet/src/arrow/read/schema/convert.rs
+++ b/crates/polars-parquet/src/arrow/read/schema/convert.rs
@@ -1,12 +1,12 @@
 //! This module has entry points, [`parquet_to_arrow_schema`] and the more configurable [`parquet_to_arrow_schema_with_options`].
 use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
-use parquet2::schema::types::{
+
+use crate::arrow::read::schema::SchemaInferenceOptions;
+use crate::parquet::schema::types::{
     FieldInfo, GroupConvertedType, GroupLogicalType, IntegerType, ParquetType, PhysicalType,
     PrimitiveConvertedType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
 };
-use parquet2::schema::Repetition;
-
-use crate::arrow::read::schema::SchemaInferenceOptions;
+use crate::parquet::schema::Repetition;
 
 /// Converts [`ParquetType`]s to a [`Field`], ignoring parquet fields that do not contain
 /// any physical column.
@@ -399,10 +399,10 @@ pub(crate) fn to_data_type(
 #[cfg(test)]
 mod tests {
     use arrow::datatypes::{DataType, Field, TimeUnit};
-    use parquet2::metadata::SchemaDescriptor;
     use polars_error::*;
 
     use super::*;
+    use crate::parquet::metadata::SchemaDescriptor;
 
     #[test]
     fn test_flat_primitives() -> PolarsResult<()> {
diff --git a/crates/polars-parquet/src/arrow/read/schema/metadata.rs b/crates/polars-parquet/src/arrow/read/schema/metadata.rs
index c3056cd63597..557f4fedfab4 100644
--- a/crates/polars-parquet/src/arrow/read/schema/metadata.rs
+++ b/crates/polars-parquet/src/arrow/read/schema/metadata.rs
@@ -2,10 +2,10 @@ use arrow::datatypes::{Metadata, Schema};
 use arrow::io::ipc::read::deserialize_schema;
 use base64::engine::general_purpose;
 use base64::Engine as _;
-pub use parquet2::metadata::KeyValue;
 use polars_error::{polars_bail, PolarsResult};
 
 use super::super::super::ARROW_SCHEMA_META_KEY;
+pub use crate::parquet::metadata::KeyValue;
 
 /// Reads an arrow schema from Parquet's file metadata. Returns `None` if no schema was found.
 /// # Errors
diff --git a/crates/polars-parquet/src/arrow/read/schema/mod.rs b/crates/polars-parquet/src/arrow/read/schema/mod.rs
index 2d4c8b5da54f..a5242105574d 100644
--- a/crates/polars-parquet/src/arrow/read/schema/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/schema/mod.rs
@@ -7,11 +7,11 @@ mod metadata;
 pub(crate) use convert::*;
 pub use convert::{parquet_to_arrow_schema, parquet_to_arrow_schema_with_options};
 pub use metadata::read_schema_from_metadata;
-pub use parquet2::metadata::{FileMetaData, KeyValue, SchemaDescriptor};
-pub use parquet2::schema::types::ParquetType;
 use polars_error::PolarsResult;
 
 use self::metadata::parse_key_value_metadata;
+pub use crate::parquet::metadata::{FileMetaData, KeyValue, SchemaDescriptor};
+pub use crate::parquet::schema::types::ParquetType;
 
 /// Options when inferring schemas from Parquet
 pub struct SchemaInferenceOptions {
diff --git a/crates/polars-parquet/src/arrow/read/statistics/binary.rs b/crates/polars-parquet/src/arrow/read/statistics/binary.rs
index 925d81176e2b..7931cec42c1c 100644
--- a/crates/polars-parquet/src/arrow/read/statistics/binary.rs
+++ b/crates/polars-parquet/src/arrow/read/statistics/binary.rs
@@ -1,8 +1,9 @@
 use arrow::array::{MutableArray, MutableBinaryArray};
 use arrow::offset::Offset;
-use parquet2::statistics::{BinaryStatistics, Statistics as ParquetStatistics};
 use polars_error::PolarsResult;
 
+use crate::parquet::statistics::{BinaryStatistics, Statistics as ParquetStatistics};
+
 pub(super) fn push<O: Offset>(
     from: Option<&dyn ParquetStatistics>,
     min: &mut dyn MutableArray,
diff --git a/crates/polars-parquet/src/arrow/read/statistics/boolean.rs b/crates/polars-parquet/src/arrow/read/statistics/boolean.rs
index 23a5504124ce..07a823f3cf2f 100644
--- a/crates/polars-parquet/src/arrow/read/statistics/boolean.rs
+++ b/crates/polars-parquet/src/arrow/read/statistics/boolean.rs
@@ -1,7 +1,8 @@
 use arrow::array::{MutableArray, MutableBooleanArray};
-use parquet2::statistics::{BooleanStatistics, Statistics as ParquetStatistics};
 use polars_error::PolarsResult;
 
+use crate::parquet::statistics::{BooleanStatistics, Statistics as ParquetStatistics};
+
 pub(super) fn push(
     from: Option<&dyn ParquetStatistics>,
     min: &mut dyn MutableArray,
diff --git a/crates/polars-parquet/src/arrow/read/statistics/fixlen.rs b/crates/polars-parquet/src/arrow/read/statistics/fixlen.rs
index b5e03eaa38f2..8758ce3c05e4 100644
--- a/crates/polars-parquet/src/arrow/read/statistics/fixlen.rs
+++ b/crates/polars-parquet/src/arrow/read/statistics/fixlen.rs
@@ -1,11 +1,11 @@
 use arrow::array::*;
 use arrow::types::{days_ms, i256};
 use ethnum::I256;
-use parquet2::statistics::{FixedLenStatistics, Statistics as ParquetStatistics};
 use polars_error::PolarsResult;
 
 use super::super::{convert_days_ms, convert_i128};
 use crate::arrow::read::convert_i256;
+use crate::parquet::statistics::{FixedLenStatistics, Statistics as ParquetStatistics};
 
 pub(super) fn push_i128(
     from: Option<&dyn ParquetStatistics>,
diff --git a/crates/polars-parquet/src/arrow/read/statistics/mod.rs b/crates/polars-parquet/src/arrow/read/statistics/mod.rs
index ada51dbc2d39..0dcf04105ca7 100644
--- a/crates/polars-parquet/src/arrow/read/statistics/mod.rs
+++ b/crates/polars-parquet/src/arrow/read/statistics/mod.rs
@@ -1,4 +1,4 @@
-//! APIs exposing `parquet2`'s statistics as arrow's statistics.
+//! APIs exposing `crate::parquet`'s statistics as arrow's statistics.
 use std::collections::VecDeque;
 use std::sync::Arc;
 
@@ -7,16 +7,17 @@ use arrow::datatypes::{DataType, Field, IntervalUnit, PhysicalType};
 use arrow::types::i256;
 use arrow::with_match_primitive_type;
 use ethnum::I256;
-use parquet2::metadata::RowGroupMetaData;
-use parquet2::schema::types::{
+use polars_error::{polars_bail, PolarsResult};
+
+use crate::parquet::metadata::RowGroupMetaData;
+use crate::parquet::schema::types::{
     PhysicalType as ParquetPhysicalType, PrimitiveType as ParquetPrimitiveType,
 };
-use parquet2::statistics::{
+use crate::parquet::statistics::{
     BinaryStatistics, BooleanStatistics, FixedLenStatistics, PrimitiveStatistics,
     Statistics as ParquetStatistics,
 };
-use parquet2::types::int96_to_i64_ns;
-use polars_error::{polars_bail, PolarsResult};
+use crate::parquet::types::int96_to_i64_ns;
 
 mod binary;
 mod boolean;
diff --git a/crates/polars-parquet/src/arrow/read/statistics/primitive.rs b/crates/polars-parquet/src/arrow/read/statistics/primitive.rs
index ecfa2e18972f..e6a48ed67628 100644
--- a/crates/polars-parquet/src/arrow/read/statistics/primitive.rs
+++ b/crates/polars-parquet/src/arrow/read/statistics/primitive.rs
@@ -1,11 +1,12 @@
 use arrow::array::*;
 use arrow::datatypes::TimeUnit;
 use arrow::types::NativeType;
-use parquet2::schema::types::{PrimitiveLogicalType, TimeUnit as ParquetTimeUnit};
-use parquet2::statistics::{PrimitiveStatistics, Statistics as ParquetStatistics};
-use parquet2::types::NativeType as ParquetNativeType;
 use polars_error::PolarsResult;
 
+use crate::parquet::schema::types::{PrimitiveLogicalType, TimeUnit as ParquetTimeUnit};
+use crate::parquet::statistics::{PrimitiveStatistics, Statistics as ParquetStatistics};
+use crate::parquet::types::NativeType as ParquetNativeType;
+
 pub fn timestamp(logical_type: Option<&PrimitiveLogicalType>, time_unit: TimeUnit, x: i64) -> i64 {
     let unit = if let Some(PrimitiveLogicalType::Timestamp { unit, .. }) = logical_type {
         unit
diff --git a/crates/polars-parquet/src/arrow/read/statistics/utf8.rs b/crates/polars-parquet/src/arrow/read/statistics/utf8.rs
index a716e8d22b8a..b12d6b7b11a9 100644
--- a/crates/polars-parquet/src/arrow/read/statistics/utf8.rs
+++ b/crates/polars-parquet/src/arrow/read/statistics/utf8.rs
@@ -1,8 +1,9 @@
 use arrow::array::{MutableArray, MutableUtf8Array};
 use arrow::offset::Offset;
-use parquet2::statistics::{BinaryStatistics, Statistics as ParquetStatistics};
 use polars_error::PolarsResult;
 
+use crate::parquet::statistics::{BinaryStatistics, Statistics as ParquetStatistics};
+
 pub(super) fn push<O: Offset>(
     from: Option<&dyn ParquetStatistics>,
     min: &mut dyn MutableArray,
diff --git a/crates/polars-parquet/src/arrow/write/binary/basic.rs b/crates/polars-parquet/src/arrow/write/binary/basic.rs
index ee2cb022e699..3390a90c43e3 100644
--- a/crates/polars-parquet/src/arrow/write/binary/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/binary/basic.rs
@@ -1,14 +1,16 @@
 use arrow::array::{Array, BinaryArray};
 use arrow::bitmap::Bitmap;
 use arrow::offset::Offset;
-use parquet2::encoding::{delta_bitpacked, Encoding};
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::{serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics};
 use polars_error::{polars_bail, PolarsResult};
 
 use super::super::{utils, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
+use crate::parquet::encoding::{delta_bitpacked, Encoding};
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::{
+    serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics,
+};
 
 pub(crate) fn encode_plain<O: Offset>(
     array: &BinaryArray<O>,
diff --git a/crates/polars-parquet/src/arrow/write/binary/nested.rs b/crates/polars-parquet/src/arrow/write/binary/nested.rs
index d72917df942d..3b20b3af4936 100644
--- a/crates/polars-parquet/src/arrow/write/binary/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/binary/nested.rs
@@ -1,14 +1,14 @@
 use arrow::array::{Array, BinaryArray};
 use arrow::offset::Offset;
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
 use polars_error::PolarsResult;
 
 use super::super::{nested, utils, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::Nested;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
 
 pub fn array_to_page<O>(
     array: &BinaryArray<O>,
diff --git a/crates/polars-parquet/src/arrow/write/boolean/basic.rs b/crates/polars-parquet/src/arrow/write/boolean/basic.rs
index c18a9b0bfb24..466039c9b55d 100644
--- a/crates/polars-parquet/src/arrow/write/boolean/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/boolean/basic.rs
@@ -1,15 +1,15 @@
 use arrow::array::*;
-use parquet2::encoding::hybrid_rle::bitpacked_encode;
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::{
-    serialize_statistics, BooleanStatistics, ParquetStatistics, Statistics,
-};
 use polars_error::PolarsResult;
 
 use super::super::{utils, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
+use crate::parquet::encoding::hybrid_rle::bitpacked_encode;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::{
+    serialize_statistics, BooleanStatistics, ParquetStatistics, Statistics,
+};
 
 fn encode(iterator: impl Iterator<Item = bool>, buffer: &mut Vec<u8>) -> PolarsResult<()> {
     // encode values using bitpacking
diff --git a/crates/polars-parquet/src/arrow/write/boolean/nested.rs b/crates/polars-parquet/src/arrow/write/boolean/nested.rs
index 3ee9cfba328f..eb7a66cfd32c 100644
--- a/crates/polars-parquet/src/arrow/write/boolean/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/boolean/nested.rs
@@ -1,13 +1,13 @@
 use arrow::array::{Array, BooleanArray};
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
 use polars_error::PolarsResult;
 
 use super::super::{nested, utils, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::Nested;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
 
 pub fn array_to_page(
     array: &BooleanArray,
diff --git a/crates/polars-parquet/src/arrow/write/dictionary.rs b/crates/polars-parquet/src/arrow/write/dictionary.rs
index e996e78770d5..519e65d28ad6 100644
--- a/crates/polars-parquet/src/arrow/write/dictionary.rs
+++ b/crates/polars-parquet/src/arrow/write/dictionary.rs
@@ -1,12 +1,6 @@
 use arrow::array::{Array, DictionaryArray, DictionaryKey};
 use arrow::bitmap::{Bitmap, MutableBitmap};
 use arrow::datatypes::DataType;
-use parquet2::encoding::hybrid_rle::encode_u32;
-use parquet2::encoding::Encoding;
-use parquet2::page::{DictPage, Page};
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::{serialize_statistics, ParquetStatistics};
-use parquet2::write::DynIter;
 use polars_error::{polars_bail, PolarsResult};
 
 use super::binary::{
@@ -22,6 +16,12 @@ use super::utf8::{build_statistics as utf8_build_statistics, encode_plain as utf
 use super::{nested, Nested, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::{slice_nested_leaf, utils};
+use crate::parquet::encoding::hybrid_rle::encode_u32;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::{DictPage, Page};
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::{serialize_statistics, ParquetStatistics};
+use crate::parquet::write::DynIter;
 
 fn serialize_def_levels_simple(
     validity: Option<&Bitmap>,
diff --git a/crates/polars-parquet/src/arrow/write/file.rs b/crates/polars-parquet/src/arrow/write/file.rs
index b0fad55b1e1d..a44a914ad46f 100644
--- a/crates/polars-parquet/src/arrow/write/file.rs
+++ b/crates/polars-parquet/src/arrow/write/file.rs
@@ -1,12 +1,12 @@
 use std::io::Write;
 
 use arrow::datatypes::Schema;
-use parquet2::metadata::{KeyValue, SchemaDescriptor};
-use parquet2::write::{RowGroupIter, WriteOptions as FileWriteOptions};
 use polars_error::{PolarsError, PolarsResult};
 
 use super::schema::schema_to_metadata_key;
 use super::{to_parquet_schema, ThriftFileMetaData, WriteOptions};
+use crate::parquet::metadata::{KeyValue, SchemaDescriptor};
+use crate::parquet::write::{RowGroupIter, WriteOptions as FileWriteOptions};
 
 /// Attaches [`Schema`] to `key_value_metadata`
 pub fn add_arrow_schema(
@@ -23,7 +23,7 @@ pub fn add_arrow_schema(
 
 /// An interface to write a parquet to a [`Write`]
 pub struct FileWriter<W: Write> {
-    writer: parquet2::write::FileWriter<W>,
+    writer: crate::parquet::write::FileWriter<W>,
     schema: Schema,
     options: WriteOptions,
 }
@@ -56,7 +56,7 @@ impl<W: Write> FileWriter<W> {
         let created_by = Some("Arrow2 - Native Rust implementation of Arrow".to_string());
 
         Ok(Self {
-            writer: parquet2::write::FileWriter::new(
+            writer: crate::parquet::write::FileWriter::new(
                 writer,
                 parquet_schema,
                 FileWriteOptions {
diff --git a/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs b/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs
index 0531f66ffa4d..c1ce9754a4ed 100644
--- a/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs
+++ b/crates/polars-parquet/src/arrow/write/fixed_len_bytes.rs
@@ -1,14 +1,14 @@
 use arrow::array::{Array, FixedSizeBinaryArray, PrimitiveArray};
 use arrow::types::i256;
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::{serialize_statistics, FixedLenStatistics};
 use polars_error::PolarsResult;
 
 use super::binary::ord_binary;
 use super::{utils, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::{serialize_statistics, FixedLenStatistics};
 
 pub(crate) fn encode_plain(array: &FixedSizeBinaryArray, is_optional: bool, buffer: &mut Vec<u8>) {
     // append the non-null values
diff --git a/crates/polars-parquet/src/arrow/write/mod.rs b/crates/polars-parquet/src/arrow/write/mod.rs
index a1db4a4f4147..7276d90639dd 100644
--- a/crates/polars-parquet/src/arrow/write/mod.rs
+++ b/crates/polars-parquet/src/arrow/write/mod.rs
@@ -32,20 +32,23 @@ use arrow::datatypes::*;
 use arrow::types::{days_ms, i256, NativeType};
 pub use nested::{num_values, write_rep_and_def};
 pub use pages::{to_leaves, to_nested, to_parquet_leaves};
-pub use parquet2::compression::{BrotliLevel, CompressionOptions, GzipLevel, ZstdLevel};
-pub use parquet2::encoding::Encoding;
-pub use parquet2::metadata::{
+pub use utils::write_def_levels;
+
+pub use crate::parquet::compression::{BrotliLevel, CompressionOptions, GzipLevel, ZstdLevel};
+pub use crate::parquet::encoding::Encoding;
+pub use crate::parquet::metadata::{
     Descriptor, FileMetaData, KeyValue, SchemaDescriptor, ThriftFileMetaData,
 };
-pub use parquet2::page::{CompressedDataPage, CompressedPage, Page};
-use parquet2::schema::types::PrimitiveType as ParquetPrimitiveType;
-pub use parquet2::schema::types::{FieldInfo, ParquetType, PhysicalType as ParquetPhysicalType};
-pub use parquet2::write::{
+pub use crate::parquet::page::{CompressedDataPage, CompressedPage, Page};
+use crate::parquet::schema::types::PrimitiveType as ParquetPrimitiveType;
+pub use crate::parquet::schema::types::{
+    FieldInfo, ParquetType, PhysicalType as ParquetPhysicalType,
+};
+pub use crate::parquet::write::{
     compress, write_metadata_sidecar, Compressor, DynIter, DynStreamingIterator, RowGroupIter,
     Version,
 };
-pub use parquet2::{fallible_streaming_iterator, FallibleStreamingIterator};
-pub use utils::write_def_levels;
+pub use crate::parquet::{fallible_streaming_iterator, FallibleStreamingIterator};
 
 /// Currently supported options to write to parquet
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
diff --git a/crates/polars-parquet/src/arrow/write/nested/mod.rs b/crates/polars-parquet/src/arrow/write/nested/mod.rs
index 4fed334a820f..c53d266255c5 100644
--- a/crates/polars-parquet/src/arrow/write/nested/mod.rs
+++ b/crates/polars-parquet/src/arrow/write/nested/mod.rs
@@ -2,13 +2,13 @@ mod def;
 mod rep;
 
 use arrow::offset::Offset;
-use parquet2::encoding::hybrid_rle::encode_u32;
-use parquet2::read::levels::get_bit_width;
-use parquet2::write::Version;
 use polars_error::PolarsResult;
 pub use rep::num_values;
 
 use super::Nested;
+use crate::parquet::encoding::hybrid_rle::encode_u32;
+use crate::parquet::read::levels::get_bit_width;
+use crate::parquet::write::Version;
 
 fn write_levels_v1<F: FnOnce(&mut Vec<u8>) -> PolarsResult<()>>(
     buffer: &mut Vec<u8>,
diff --git a/crates/polars-parquet/src/arrow/write/pages.rs b/crates/polars-parquet/src/arrow/write/pages.rs
index 46698d083f96..2a3a3ac47f22 100644
--- a/crates/polars-parquet/src/arrow/write/pages.rs
+++ b/crates/polars-parquet/src/arrow/write/pages.rs
@@ -4,13 +4,13 @@ use arrow::array::{Array, ListArray, MapArray, StructArray};
 use arrow::bitmap::Bitmap;
 use arrow::datatypes::PhysicalType;
 use arrow::offset::{Offset, OffsetsBuffer};
-use parquet2::page::Page;
-use parquet2::schema::types::{ParquetType, PrimitiveType as ParquetPrimitiveType};
-use parquet2::write::DynIter;
 use polars_error::{polars_bail, PolarsResult};
 
 use super::{array_to_pages, Encoding, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
+use crate::parquet::page::Page;
+use crate::parquet::schema::types::{ParquetType, PrimitiveType as ParquetPrimitiveType};
+use crate::parquet::write::DynIter;
 
 #[derive(Debug, Clone, PartialEq)]
 pub struct ListNested<O: Offset> {
@@ -259,11 +259,13 @@ mod tests {
     use arrow::array::*;
     use arrow::bitmap::Bitmap;
     use arrow::datatypes::*;
-    use parquet2::schema::types::{GroupLogicalType, PrimitiveConvertedType, PrimitiveLogicalType};
-    use parquet2::schema::Repetition;
 
     use super::super::{FieldInfo, ParquetPhysicalType, ParquetPrimitiveType};
     use super::*;
+    use crate::parquet::schema::types::{
+        GroupLogicalType, PrimitiveConvertedType, PrimitiveLogicalType,
+    };
+    use crate::parquet::schema::Repetition;
 
     #[test]
     fn test_struct() {
diff --git a/crates/polars-parquet/src/arrow/write/primitive/basic.rs b/crates/polars-parquet/src/arrow/write/primitive/basic.rs
index 81d7b5cba943..a83e1f22d45d 100644
--- a/crates/polars-parquet/src/arrow/write/primitive/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/primitive/basic.rs
@@ -1,16 +1,16 @@
 use arrow::array::{Array, PrimitiveArray};
 use arrow::types::NativeType;
-use parquet2::encoding::delta_bitpacked::encode;
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::{serialize_statistics, PrimitiveStatistics};
-use parquet2::types::NativeType as ParquetNativeType;
 use polars_error::{polars_bail, PolarsResult};
 
 use super::super::{utils, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::utils::ExactSizedIter;
+use crate::parquet::encoding::delta_bitpacked::encode;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::{serialize_statistics, PrimitiveStatistics};
+use crate::parquet::types::NativeType as ParquetNativeType;
 
 pub(crate) fn encode_plain<T, P>(
     array: &PrimitiveArray<T>,
diff --git a/crates/polars-parquet/src/arrow/write/primitive/nested.rs b/crates/polars-parquet/src/arrow/write/primitive/nested.rs
index a5cb2229de6f..22f6ec7d8148 100644
--- a/crates/polars-parquet/src/arrow/write/primitive/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/primitive/nested.rs
@@ -1,16 +1,16 @@
 use arrow::array::{Array, PrimitiveArray};
 use arrow::types::NativeType as ArrowNativeType;
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::serialize_statistics;
-use parquet2::types::NativeType;
 use polars_error::PolarsResult;
 
 use super::super::{nested, utils, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::Nested;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::serialize_statistics;
+use crate::parquet::types::NativeType;
 
 pub fn array_to_page<T, R>(
     array: &PrimitiveArray<T>,
diff --git a/crates/polars-parquet/src/arrow/write/row_group.rs b/crates/polars-parquet/src/arrow/write/row_group.rs
index 6d2269c178b9..88ede63c3d75 100644
--- a/crates/polars-parquet/src/arrow/write/row_group.rs
+++ b/crates/polars-parquet/src/arrow/write/row_group.rs
@@ -1,16 +1,16 @@
 use arrow::array::Array;
 use arrow::chunk::Chunk;
 use arrow::datatypes::Schema;
-use parquet2::error::Error as ParquetError;
-use parquet2::schema::types::ParquetType;
-use parquet2::write::Compressor;
-use parquet2::FallibleStreamingIterator;
 use polars_error::{polars_bail, to_compute_err, PolarsError, PolarsResult};
 
 use super::{
     array_to_columns, to_parquet_schema, DynIter, DynStreamingIterator, Encoding, RowGroupIter,
     SchemaDescriptor, WriteOptions,
 };
+use crate::parquet::error::Error as ParquetError;
+use crate::parquet::schema::types::ParquetType;
+use crate::parquet::write::Compressor;
+use crate::parquet::FallibleStreamingIterator;
 
 /// Maps a [`Chunk`] and parquet-specific options to an [`RowGroupIter`] used to
 /// write to parquet
diff --git a/crates/polars-parquet/src/arrow/write/schema.rs b/crates/polars-parquet/src/arrow/write/schema.rs
index 89fa6c7ef99a..e4a84a7d7906 100644
--- a/crates/polars-parquet/src/arrow/write/schema.rs
+++ b/crates/polars-parquet/src/arrow/write/schema.rs
@@ -2,16 +2,16 @@ use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use arrow::io::ipc::write::{default_ipc_fields, schema_to_bytes};
 use base64::engine::general_purpose;
 use base64::Engine as _;
-use parquet2::metadata::KeyValue;
-use parquet2::schema::types::{
-    GroupConvertedType, GroupLogicalType, IntegerType, ParquetType, PhysicalType,
-    PrimitiveConvertedType, PrimitiveLogicalType, TimeUnit as ParquetTimeUnit,
-};
-use parquet2::schema::Repetition;
 use polars_error::{polars_bail, PolarsResult};
 
 use super::super::ARROW_SCHEMA_META_KEY;
 use crate::arrow::write::decimal_length_from_precision;
+use crate::parquet::metadata::KeyValue;
+use crate::parquet::schema::types::{
+    GroupConvertedType, GroupLogicalType, IntegerType, ParquetType, PhysicalType,
+    PrimitiveConvertedType, PrimitiveLogicalType, TimeUnit as ParquetTimeUnit,
+};
+use crate::parquet::schema::Repetition;
 
 pub fn schema_to_metadata_key(schema: &Schema) -> KeyValue {
     let serialized_schema = schema_to_bytes(schema, &default_ipc_fields(&schema.fields));
diff --git a/crates/polars-parquet/src/arrow/write/sink.rs b/crates/polars-parquet/src/arrow/write/sink.rs
index 16ffd4176d1d..1ddef77aa687 100644
--- a/crates/polars-parquet/src/arrow/write/sink.rs
+++ b/crates/polars-parquet/src/arrow/write/sink.rs
@@ -7,12 +7,12 @@ use arrow::chunk::Chunk;
 use arrow::datatypes::Schema;
 use futures::future::BoxFuture;
 use futures::{AsyncWrite, AsyncWriteExt, FutureExt, Sink, TryFutureExt};
-use parquet2::metadata::KeyValue;
-use parquet2::write::{FileStreamer, WriteOptions as ParquetWriteOptions};
 use polars_error::{polars_bail, to_compute_err, PolarsError, PolarsResult};
 
 use super::file::add_arrow_schema;
 use super::{Encoding, SchemaDescriptor, WriteOptions};
+use crate::parquet::metadata::KeyValue;
+use crate::parquet::write::{FileStreamer, WriteOptions as ParquetWriteOptions};
 
 /// Sink that writes array [`chunks`](Chunk) as a Parquet file.
 ///
diff --git a/crates/polars-parquet/src/arrow/write/utf8/basic.rs b/crates/polars-parquet/src/arrow/write/utf8/basic.rs
index cb64dfd561f5..f1d874683fcd 100644
--- a/crates/polars-parquet/src/arrow/write/utf8/basic.rs
+++ b/crates/polars-parquet/src/arrow/write/utf8/basic.rs
@@ -1,14 +1,16 @@
 use arrow::array::{Array, Utf8Array};
 use arrow::offset::Offset;
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::{serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics};
 use polars_error::{polars_bail, PolarsResult};
 
 use super::super::binary::{encode_delta, ord_binary};
 use super::super::{utils, WriteOptions};
 use crate::arrow::read::schema::is_nullable;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::{
+    serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics,
+};
 
 pub(crate) fn encode_plain<O: Offset>(
     array: &Utf8Array<O>,
diff --git a/crates/polars-parquet/src/arrow/write/utf8/nested.rs b/crates/polars-parquet/src/arrow/write/utf8/nested.rs
index a0a8640dde9f..1cc0a1f0523b 100644
--- a/crates/polars-parquet/src/arrow/write/utf8/nested.rs
+++ b/crates/polars-parquet/src/arrow/write/utf8/nested.rs
@@ -1,14 +1,14 @@
 use arrow::array::{Array, Utf8Array};
 use arrow::offset::Offset;
-use parquet2::encoding::Encoding;
-use parquet2::page::DataPage;
-use parquet2::schema::types::PrimitiveType;
 use polars_error::PolarsResult;
 
 use super::super::{nested, utils, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
 use crate::arrow::read::schema::is_nullable;
 use crate::arrow::write::Nested;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::page::DataPage;
+use crate::parquet::schema::types::PrimitiveType;
 
 pub fn array_to_page<O>(
     array: &Utf8Array<O>,
diff --git a/crates/polars-parquet/src/arrow/write/utils.rs b/crates/polars-parquet/src/arrow/write/utils.rs
index c7424e6e6f4d..6497240374c6 100644
--- a/crates/polars-parquet/src/arrow/write/utils.rs
+++ b/crates/polars-parquet/src/arrow/write/utils.rs
@@ -1,14 +1,14 @@
 use arrow::bitmap::Bitmap;
-use parquet2::compression::CompressionOptions;
-use parquet2::encoding::hybrid_rle::encode_bool;
-use parquet2::encoding::Encoding;
-use parquet2::metadata::Descriptor;
-use parquet2::page::{DataPage, DataPageHeader, DataPageHeaderV1, DataPageHeaderV2};
-use parquet2::schema::types::PrimitiveType;
-use parquet2::statistics::ParquetStatistics;
 use polars_error::PolarsResult;
 
 use super::{Version, WriteOptions};
+use crate::parquet::compression::CompressionOptions;
+use crate::parquet::encoding::hybrid_rle::encode_bool;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::metadata::Descriptor;
+use crate::parquet::page::{DataPage, DataPageHeader, DataPageHeaderV1, DataPageHeaderV2};
+use crate::parquet::schema::types::PrimitiveType;
+use crate::parquet::statistics::ParquetStatistics;
 
 fn encode_iter_v1<I: Iterator<Item = bool>>(buffer: &mut Vec<u8>, iter: I) -> PolarsResult<()> {
     buffer.extend_from_slice(&[0; 4]);
diff --git a/crates/polars-parquet/src/lib.rs b/crates/polars-parquet/src/lib.rs
index 4b64c583ce23..ae45ad4df442 100644
--- a/crates/polars-parquet/src/lib.rs
+++ b/crates/polars-parquet/src/lib.rs
@@ -1,3 +1,4 @@
 #![allow(clippy::len_without_is_empty)]
 pub mod arrow;
 pub use arrow::{read, write};
+pub mod parquet;
diff --git a/crates/polars-parquet/src/parquet/bloom_filter/hash.rs b/crates/polars-parquet/src/parquet/bloom_filter/hash.rs
new file mode 100644
index 000000000000..c535faa44d76
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/bloom_filter/hash.rs
@@ -0,0 +1,17 @@
+use xxhash_rust::xxh64::xxh64;
+
+use crate::parquet::types::NativeType;
+
+const SEED: u64 = 0;
+
+/// (xxh64) hash of a [`NativeType`].
+#[inline]
+pub fn hash_native<T: NativeType>(value: T) -> u64 {
+    xxh64(value.to_le_bytes().as_ref(), SEED)
+}
+
+/// (xxh64) hash of a sequence of bytes (e.g. ByteArray).
+#[inline]
+pub fn hash_byte<A: AsRef<[u8]>>(value: A) -> u64 {
+    xxh64(value.as_ref(), SEED)
+}
diff --git a/crates/polars-parquet/src/parquet/bloom_filter/mod.rs b/crates/polars-parquet/src/parquet/bloom_filter/mod.rs
new file mode 100644
index 000000000000..218715d7ac5f
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/bloom_filter/mod.rs
@@ -0,0 +1,71 @@
+//! API to read and use bloom filters
+mod hash;
+mod read;
+mod split_block;
+
+pub use hash::{hash_byte, hash_native};
+pub use read::read;
+pub use split_block::{insert, is_in_set};
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn basics() {
+        let mut bitset = vec![0; 32];
+
+        // insert
+        for a in 0..10i64 {
+            let hash = hash_native(a);
+            insert(&mut bitset, hash);
+        }
+
+        // bloom filter produced by parquet-mr/spark for a column of i64 (0..=10)
+        /*
+        import pyspark.sql  // 3.2.1
+        spark = pyspark.sql.SparkSession.builder.getOrCreate()
+        spark.conf.set("parquet.bloom.filter.enabled", True)
+        spark.conf.set("parquet.bloom.filter.expected.ndv", 10)
+        spark.conf.set("parquet.bloom.filter.max.bytes", 32)
+
+        data = [(i % 10,) for i in range(100)]
+        df = spark.createDataFrame(data, ["id"]).repartition(1)
+
+        df.write.parquet("bla.parquet", mode = "overwrite")
+        */
+        let expected: &[u8] = &[
+            24, 130, 24, 8, 134, 8, 68, 6, 2, 101, 128, 10, 64, 2, 38, 78, 114, 1, 64, 38, 1, 192,
+            194, 152, 64, 70, 0, 36, 56, 121, 64, 0,
+        ];
+        assert_eq!(bitset, expected);
+
+        // check
+        for a in 0..11i64 {
+            let hash = hash_native(a);
+
+            let valid = is_in_set(&bitset, hash);
+
+            assert_eq!(a < 10, valid);
+        }
+    }
+
+    #[test]
+    fn binary() {
+        let mut bitset = vec![0; 32];
+
+        // insert
+        for a in 0..10i64 {
+            let value = format!("a{}", a);
+            let hash = hash_byte(value);
+            insert(&mut bitset, hash);
+        }
+
+        // bloom filter produced by parquet-mr/spark for a column of i64 f"a{i}" for i in 0..10
+        let expected: &[u8] = &[
+            200, 1, 80, 20, 64, 68, 8, 109, 6, 37, 4, 67, 144, 80, 96, 32, 8, 132, 43, 33, 0, 5,
+            99, 65, 2, 0, 224, 44, 64, 78, 96, 4,
+        ];
+        assert_eq!(bitset, expected);
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/bloom_filter/read.rs b/crates/polars-parquet/src/parquet/bloom_filter/read.rs
new file mode 100644
index 000000000000..50c90a2e407a
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/bloom_filter/read.rs
@@ -0,0 +1,51 @@
+use std::io::{Read, Seek, SeekFrom};
+
+use parquet_format_safe::thrift::protocol::TCompactInputProtocol;
+use parquet_format_safe::{
+    BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHeader, SplitBlockAlgorithm,
+    Uncompressed,
+};
+
+use crate::parquet::error::Error;
+use crate::parquet::metadata::ColumnChunkMetaData;
+
+/// Reads the bloom filter associated to [`ColumnChunkMetaData`] into `bitset`.
+/// Results in an empty `bitset` if there is no associated bloom filter or the algorithm is not supported.
+/// # Error
+/// Errors if the column contains no metadata or the filter can't be read or deserialized.
+pub fn read<R: Read + Seek>(
+    column_metadata: &ColumnChunkMetaData,
+    mut reader: &mut R,
+    bitset: &mut Vec<u8>,
+) -> Result<(), Error> {
+    let offset = column_metadata.metadata().bloom_filter_offset;
+
+    let offset = if let Some(offset) = offset {
+        offset as u64
+    } else {
+        bitset.clear();
+        return Ok(());
+    };
+    reader.seek(SeekFrom::Start(offset))?;
+
+    // deserialize header
+    let mut prot = TCompactInputProtocol::new(&mut reader, usize::MAX); // max is ok since `BloomFilterHeader` never allocates
+    let header = BloomFilterHeader::read_from_in_protocol(&mut prot)?;
+
+    if header.algorithm != BloomFilterAlgorithm::BLOCK(SplitBlockAlgorithm {}) {
+        bitset.clear();
+        return Ok(());
+    }
+    if header.compression != BloomFilterCompression::UNCOMPRESSED(Uncompressed {}) {
+        bitset.clear();
+        return Ok(());
+    }
+
+    let length: usize = header.num_bytes.try_into()?;
+
+    bitset.clear();
+    bitset.try_reserve(length)?;
+    reader.by_ref().take(length as u64).read_to_end(bitset)?;
+
+    Ok(())
+}
diff --git a/crates/polars-parquet/src/parquet/bloom_filter/split_block.rs b/crates/polars-parquet/src/parquet/bloom_filter/split_block.rs
new file mode 100644
index 000000000000..576f4d5f1aba
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/bloom_filter/split_block.rs
@@ -0,0 +1,82 @@
+use std::convert::TryInto;
+
+/// magic numbers taken from https://github.com/apache/parquet-format/blob/master/BloomFilter.md
+const SALT: [u32; 8] = [
+    1203114875, 1150766481, 2284105051, 2729912477, 1884591559, 770785867, 2667333959, 1550580529,
+];
+
+fn hash_to_block_index(hash: u64, len: usize) -> usize {
+    let number_of_blocks = len as u64 / 32;
+    let low_hash = hash >> 32;
+    let block_index = ((low_hash * number_of_blocks) >> 32) as u32;
+    block_index as usize
+}
+
+fn new_mask(x: u32) -> [u32; 8] {
+    let mut a = [0u32; 8];
+    for i in 0..8 {
+        let mask = x.wrapping_mul(SALT[i]);
+        let mask = mask >> 27;
+        let mask = 0x1 << mask;
+        a[i] = mask;
+    }
+    a
+}
+
+/// loads a block from the bitset to the stack
+#[inline]
+fn load_block(bitset: &[u8]) -> [u32; 8] {
+    let mut a = [0u32; 8];
+    let bitset = bitset.chunks_exact(4).take(8);
+    for (a, chunk) in a.iter_mut().zip(bitset) {
+        *a = u32::from_le_bytes(chunk.try_into().unwrap())
+    }
+    a
+}
+
+/// assigns a block from the stack to `bitset`
+#[inline]
+fn unload_block(block: [u32; 8], bitset: &mut [u8]) {
+    let bitset = bitset.chunks_exact_mut(4).take(8);
+    for (a, chunk) in block.iter().zip(bitset) {
+        let a = a.to_le_bytes();
+        chunk[0] = a[0];
+        chunk[1] = a[1];
+        chunk[2] = a[2];
+        chunk[3] = a[3];
+    }
+}
+
+/// Returns whether the `hash` is in the set
+pub fn is_in_set(bitset: &[u8], hash: u64) -> bool {
+    let block_index = hash_to_block_index(hash, bitset.len());
+    let key = hash as u32;
+
+    let mask = new_mask(key);
+    let slice = &bitset[block_index * 32..(block_index + 1) * 32];
+    let block_mask = load_block(slice);
+
+    for i in 0..8 {
+        if mask[i] & block_mask[i] == 0 {
+            return false;
+        }
+    }
+    true
+}
+
+/// Inserts a new hash to the set
+pub fn insert(bitset: &mut [u8], hash: u64) {
+    let block_index = hash_to_block_index(hash, bitset.len());
+    let key = hash as u32;
+
+    let mask = new_mask(key);
+    let slice = &bitset[block_index * 32..(block_index + 1) * 32];
+    let mut block_mask = load_block(slice);
+
+    for i in 0..8 {
+        block_mask[i] |= mask[i];
+
+        let mut_slice = &mut bitset[block_index * 32..(block_index + 1) * 32];
+        unload_block(block_mask, mut_slice)
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/compression.rs b/crates/polars-parquet/src/parquet/compression.rs
new file mode 100644
index 000000000000..9ec187ddee9b
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/compression.rs
@@ -0,0 +1,385 @@
+//! Functionality to compress and decompress data according to the parquet specification
+pub use super::parquet_bridge::{
+    BrotliLevel, Compression, CompressionOptions, GzipLevel, ZstdLevel,
+};
+use crate::parquet::error::{Error, Result};
+
+fn inner_compress<G: Fn(usize) -> Result<usize>, F: Fn(&[u8], &mut [u8]) -> Result<usize>>(
+    input: &[u8],
+    output: &mut Vec<u8>,
+    get_length: G,
+    compress: F,
+) -> Result<()> {
+    let original_length = output.len();
+    let max_required_length = get_length(input.len())?;
+
+    output.resize(original_length + max_required_length, 0);
+    let compressed_size = compress(input, &mut output[original_length..])?;
+
+    output.truncate(original_length + compressed_size);
+    Ok(())
+}
+
+/// Compresses data stored in slice `input_buf` and writes the compressed result
+/// to `output_buf`.
+/// Note that you'll need to call `clear()` before reusing the same `output_buf`
+/// across different `compress` calls.
+pub fn compress(
+    compression: CompressionOptions,
+    input_buf: &[u8],
+    output_buf: &mut Vec<u8>,
+) -> Result<()> {
+    match compression {
+        #[cfg(feature = "brotli")]
+        CompressionOptions::Brotli(level) => {
+            use std::io::Write;
+            const BROTLI_DEFAULT_BUFFER_SIZE: usize = 4096;
+            const BROTLI_DEFAULT_LG_WINDOW_SIZE: u32 = 22; // recommended between 20-22
+
+            let q = level.unwrap_or_default();
+            let mut encoder = brotli::CompressorWriter::new(
+                output_buf,
+                BROTLI_DEFAULT_BUFFER_SIZE,
+                q.compression_level(),
+                BROTLI_DEFAULT_LG_WINDOW_SIZE,
+            );
+            encoder.write_all(input_buf)?;
+            encoder.flush().map_err(|e| e.into())
+        },
+        #[cfg(not(feature = "brotli"))]
+        CompressionOptions::Brotli(_) => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Brotli,
+            "compress to brotli".to_string(),
+        )),
+        #[cfg(feature = "gzip")]
+        CompressionOptions::Gzip(level) => {
+            use std::io::Write;
+            let level = level.unwrap_or_default();
+            let mut encoder = flate2::write::GzEncoder::new(output_buf, level.into());
+            encoder.write_all(input_buf)?;
+            encoder.try_finish().map_err(|e| e.into())
+        },
+        #[cfg(not(feature = "gzip"))]
+        CompressionOptions::Gzip(_) => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Gzip,
+            "compress to gzip".to_string(),
+        )),
+        #[cfg(feature = "snappy")]
+        CompressionOptions::Snappy => inner_compress(
+            input_buf,
+            output_buf,
+            |len| Ok(snap::raw::max_compress_len(len)),
+            |input, output| Ok(snap::raw::Encoder::new().compress(input, output)?),
+        ),
+        #[cfg(not(feature = "snappy"))]
+        CompressionOptions::Snappy => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Snappy,
+            "compress to snappy".to_string(),
+        )),
+        #[cfg(feature = "lz4")]
+        CompressionOptions::Lz4Raw => inner_compress(
+            input_buf,
+            output_buf,
+            |len| Ok(lz4::block::compress_bound(len)?),
+            |input, output| {
+                let compressed_size = lz4::block::compress_to_buffer(input, None, false, output)?;
+                Ok(compressed_size)
+            },
+        ),
+        #[cfg(all(not(feature = "lz4"), not(feature = "lz4_flex")))]
+        CompressionOptions::Lz4Raw => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Lz4,
+            "compress to lz4".to_string(),
+        )),
+        #[cfg(feature = "zstd")]
+        CompressionOptions::Zstd(level) => {
+            use std::io::Write;
+            let level = level.map(|v| v.compression_level()).unwrap_or_default();
+
+            let mut encoder = zstd::Encoder::new(output_buf, level)?;
+            encoder.write_all(input_buf)?;
+            match encoder.finish() {
+                Ok(_) => Ok(()),
+                Err(e) => Err(e.into()),
+            }
+        },
+        #[cfg(not(feature = "zstd"))]
+        CompressionOptions::Zstd(_) => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Zstd,
+            "compress to zstd".to_string(),
+        )),
+        CompressionOptions::Uncompressed => Err(Error::InvalidParameter(
+            "Compressing uncompressed".to_string(),
+        )),
+        _ => Err(Error::FeatureNotSupported(format!(
+            "Compression {:?} is not supported",
+            compression,
+        ))),
+    }
+}
+
+/// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
+/// Returns the total number of bytes written.
+pub fn decompress(compression: Compression, input_buf: &[u8], output_buf: &mut [u8]) -> Result<()> {
+    match compression {
+        #[cfg(feature = "brotli")]
+        Compression::Brotli => {
+            use std::io::Read;
+            const BROTLI_DEFAULT_BUFFER_SIZE: usize = 4096;
+            brotli::Decompressor::new(input_buf, BROTLI_DEFAULT_BUFFER_SIZE)
+                .read_exact(output_buf)
+                .map_err(|e| e.into())
+        },
+        #[cfg(not(feature = "brotli"))]
+        Compression::Brotli => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Brotli,
+            "decompress with brotli".to_string(),
+        )),
+        #[cfg(feature = "gzip")]
+        Compression::Gzip => {
+            use std::io::Read;
+            let mut decoder = flate2::read::GzDecoder::new(input_buf);
+            decoder.read_exact(output_buf).map_err(|e| e.into())
+        },
+        #[cfg(not(feature = "gzip"))]
+        Compression::Gzip => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Gzip,
+            "decompress with gzip".to_string(),
+        )),
+        #[cfg(feature = "snappy")]
+        Compression::Snappy => {
+            use snap::raw::{decompress_len, Decoder};
+
+            let len = decompress_len(input_buf)?;
+            if len > output_buf.len() {
+                return Err(Error::OutOfSpec(String::from("snappy header out of spec")));
+            }
+            Decoder::new()
+                .decompress(input_buf, output_buf)
+                .map_err(|e| e.into())
+                .map(|_| ())
+        },
+        #[cfg(not(feature = "snappy"))]
+        Compression::Snappy => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Snappy,
+            "decompress with snappy".to_string(),
+        )),
+        #[cfg(all(feature = "lz4_flex", not(feature = "lz4")))]
+        Compression::Lz4Raw => lz4_flex::block::decompress_into(input_buf, output_buf)
+            .map(|_| {})
+            .map_err(|e| e.into()),
+        #[cfg(feature = "lz4")]
+        Compression::Lz4Raw => {
+            lz4::block::decompress_to_buffer(input_buf, Some(output_buf.len() as i32), output_buf)
+                .map(|_| {})
+                .map_err(|e| e.into())
+        },
+        #[cfg(all(not(feature = "lz4"), not(feature = "lz4_flex")))]
+        Compression::Lz4Raw => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Lz4,
+            "decompress with lz4".to_string(),
+        )),
+
+        #[cfg(any(feature = "lz4_flex", feature = "lz4"))]
+        Compression::Lz4 => try_decompress_hadoop(input_buf, output_buf).or_else(|_| {
+            lz4_decompress_to_buffer(input_buf, Some(output_buf.len() as i32), output_buf)
+                .map(|_| {})
+        }),
+
+        #[cfg(all(not(feature = "lz4_flex"), not(feature = "lz4")))]
+        Compression::Lz4 => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Lz4,
+            "decompress with legacy lz4".to_string(),
+        )),
+
+        #[cfg(feature = "zstd")]
+        Compression::Zstd => {
+            use std::io::Read;
+            let mut decoder = zstd::Decoder::new(input_buf)?;
+            decoder.read_exact(output_buf).map_err(|e| e.into())
+        },
+        #[cfg(not(feature = "zstd"))]
+        Compression::Zstd => Err(Error::FeatureNotActive(
+            crate::parquet::error::Feature::Zstd,
+            "decompress with zstd".to_string(),
+        )),
+        Compression::Uncompressed => Err(Error::InvalidParameter(
+            "Compressing uncompressed".to_string(),
+        )),
+        _ => Err(Error::FeatureNotSupported(format!(
+            "Compression {:?} is not supported",
+            compression,
+        ))),
+    }
+}
+
+/// Try to decompress the buffer as if it was compressed with the Hadoop Lz4Codec.
+/// Translated from the apache arrow c++ function [TryDecompressHadoop](https://github.com/apache/arrow/blob/bf18e6e4b5bb6180706b1ba0d597a65a4ce5ca48/cpp/src/arrow/util/compression_lz4.cc#L474).
+/// Returns error if decompression failed.
+#[cfg(any(feature = "lz4", feature = "lz4_flex"))]
+fn try_decompress_hadoop(input_buf: &[u8], output_buf: &mut [u8]) -> Result<()> {
+    // Parquet files written with the Hadoop Lz4Codec use their own framing.
+    // The input buffer can contain an arbitrary number of "frames", each
+    // with the following structure:
+    // - bytes 0..3: big-endian uint32_t representing the frame decompressed size
+    // - bytes 4..7: big-endian uint32_t representing the frame compressed size
+    // - bytes 8...: frame compressed data
+    //
+    // The Hadoop Lz4Codec source code can be found here:
+    // https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/Lz4Codec.cc
+
+    const SIZE_U32: usize = std::mem::size_of::<u32>();
+    const PREFIX_LEN: usize = SIZE_U32 * 2;
+    let mut input_len = input_buf.len();
+    let mut input = input_buf;
+    let mut output_len = output_buf.len();
+    let mut output: &mut [u8] = output_buf;
+    while input_len >= PREFIX_LEN {
+        let mut bytes = [0; SIZE_U32];
+        bytes.copy_from_slice(&input[0..4]);
+        let expected_decompressed_size = u32::from_be_bytes(bytes);
+        let mut bytes = [0; SIZE_U32];
+        bytes.copy_from_slice(&input[4..8]);
+        let expected_compressed_size = u32::from_be_bytes(bytes);
+        input = &input[PREFIX_LEN..];
+        input_len -= PREFIX_LEN;
+
+        if input_len < expected_compressed_size as usize {
+            return Err(Error::oos("Not enough bytes for Hadoop frame"));
+        }
+
+        if output_len < expected_decompressed_size as usize {
+            return Err(Error::oos("Not enough bytes to hold advertised output"));
+        }
+        let decompressed_size = lz4_decompress_to_buffer(
+            &input[..expected_compressed_size as usize],
+            Some(output_len as i32),
+            output,
+        )?;
+        if decompressed_size != expected_decompressed_size as usize {
+            return Err(Error::oos("unexpected decompressed size"));
+        }
+        input_len -= expected_compressed_size as usize;
+        output_len -= expected_decompressed_size as usize;
+        if input_len > expected_compressed_size as usize {
+            input = &input[expected_compressed_size as usize..];
+            output = &mut output[expected_decompressed_size as usize..];
+        } else {
+            break;
+        }
+    }
+    if input_len == 0 {
+        Ok(())
+    } else {
+        Err(Error::oos("Not all input are consumed"))
+    }
+}
+
+#[cfg(feature = "lz4")]
+#[inline]
+fn lz4_decompress_to_buffer(
+    src: &[u8],
+    uncompressed_size: Option<i32>,
+    buffer: &mut [u8],
+) -> Result<usize> {
+    let size = lz4::block::decompress_to_buffer(src, uncompressed_size, buffer)?;
+    Ok(size)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn test_roundtrip(c: CompressionOptions, data: &[u8]) {
+        let offset = 2048;
+
+        // Compress to a buffer that already has data is possible
+        let mut compressed = vec![2; offset];
+        compress(c, data, &mut compressed).expect("Error when compressing");
+
+        // data is compressed...
+        assert!(compressed.len() - offset < data.len());
+
+        let mut decompressed = vec![0; data.len()];
+        decompress(c.into(), &compressed[offset..], &mut decompressed)
+            .expect("Error when decompressing");
+        assert_eq!(data, decompressed.as_slice());
+    }
+
+    fn test_codec(c: CompressionOptions) {
+        let sizes = vec![1000, 10000, 100000];
+        for size in sizes {
+            let data = (0..size).map(|x| (x % 255) as u8).collect::<Vec<_>>();
+            test_roundtrip(c, &data);
+        }
+    }
+
+    #[test]
+    fn test_codec_snappy() {
+        test_codec(CompressionOptions::Snappy);
+    }
+
+    #[test]
+    fn test_codec_gzip_default() {
+        test_codec(CompressionOptions::Gzip(None));
+    }
+
+    #[test]
+    fn test_codec_gzip_low_compression() {
+        test_codec(CompressionOptions::Gzip(Some(
+            GzipLevel::try_new(1).unwrap(),
+        )));
+    }
+
+    #[test]
+    fn test_codec_gzip_high_compression() {
+        test_codec(CompressionOptions::Gzip(Some(
+            GzipLevel::try_new(10).unwrap(),
+        )));
+    }
+
+    #[test]
+    fn test_codec_brotli_default() {
+        test_codec(CompressionOptions::Brotli(None));
+    }
+
+    #[test]
+    fn test_codec_brotli_low_compression() {
+        test_codec(CompressionOptions::Brotli(Some(
+            BrotliLevel::try_new(1).unwrap(),
+        )));
+    }
+
+    #[test]
+    fn test_codec_brotli_high_compression() {
+        test_codec(CompressionOptions::Brotli(Some(
+            BrotliLevel::try_new(11).unwrap(),
+        )));
+    }
+
+    #[test]
+    fn test_codec_lz4_raw() {
+        test_codec(CompressionOptions::Lz4Raw);
+    }
+
+    #[test]
+    fn test_codec_zstd_default() {
+        test_codec(CompressionOptions::Zstd(None));
+    }
+
+    #[cfg(feature = "zstd")]
+    #[test]
+    fn test_codec_zstd_low_compression() {
+        test_codec(CompressionOptions::Zstd(Some(
+            ZstdLevel::try_new(1).unwrap(),
+        )));
+    }
+
+    #[cfg(feature = "zstd")]
+    #[test]
+    fn test_codec_zstd_high_compression() {
+        test_codec(CompressionOptions::Zstd(Some(
+            ZstdLevel::try_new(21).unwrap(),
+        )));
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/deserialize/binary.rs b/crates/polars-parquet/src/parquet/deserialize/binary.rs
new file mode 100644
index 000000000000..683b45e3bdc2
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/binary.rs
@@ -0,0 +1,70 @@
+use super::utils;
+use crate::parquet::encoding::hybrid_rle;
+use crate::parquet::encoding::plain_byte_array::BinaryIter;
+use crate::parquet::error::Error;
+use crate::parquet::page::{split_buffer, DataPage};
+use crate::parquet::parquet_bridge::{Encoding, Repetition};
+
+#[derive(Debug)]
+pub struct Dictionary<'a, P> {
+    pub indexes: hybrid_rle::HybridRleDecoder<'a>,
+    pub dict: P,
+}
+
+impl<'a, P> Dictionary<'a, P> {
+    pub fn try_new(page: &'a DataPage, dict: P) -> Result<Self, Error> {
+        let indexes = utils::dict_indices_decoder(page)?;
+
+        Ok(Self { indexes, dict })
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.indexes.size_hint().0
+    }
+}
+
+#[allow(clippy::large_enum_variant)]
+pub enum BinaryPageState<'a, P> {
+    Optional(utils::DefLevelsDecoder<'a>, BinaryIter<'a>),
+    Required(BinaryIter<'a>),
+    RequiredDictionary(Dictionary<'a, P>),
+    OptionalDictionary(utils::DefLevelsDecoder<'a>, Dictionary<'a, P>),
+}
+
+impl<'a, P> BinaryPageState<'a, P> {
+    pub fn try_new(page: &'a DataPage, dict: Option<P>) -> Result<Self, Error> {
+        let is_optional =
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+
+        match (page.encoding(), dict, is_optional) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
+                Dictionary::try_new(page, dict).map(Self::RequiredDictionary)
+            },
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+                Ok(Self::OptionalDictionary(
+                    utils::DefLevelsDecoder::try_new(page)?,
+                    Dictionary::try_new(page, dict)?,
+                ))
+            },
+            (Encoding::Plain, _, true) => {
+                let (_, _, values) = split_buffer(page)?;
+
+                let validity = utils::DefLevelsDecoder::try_new(page)?;
+                let values = BinaryIter::new(values, None);
+
+                Ok(Self::Optional(validity, values))
+            },
+            (Encoding::Plain, _, false) => {
+                let (_, _, values) = split_buffer(page)?;
+                let values = BinaryIter::new(values, Some(page.num_values()));
+
+                Ok(Self::Required(values))
+            },
+            _ => Err(Error::FeatureNotSupported(format!(
+                "Viewing page for encoding {:?} for binary type",
+                page.encoding(),
+            ))),
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/deserialize/boolean.rs b/crates/polars-parquet/src/parquet/deserialize/boolean.rs
new file mode 100644
index 000000000000..435e2b0abd30
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/boolean.rs
@@ -0,0 +1,39 @@
+use super::utils;
+use crate::parquet::encoding::hybrid_rle::BitmapIter;
+use crate::parquet::error::Error;
+use crate::parquet::page::{split_buffer, DataPage};
+use crate::parquet::parquet_bridge::{Encoding, Repetition};
+
+// The state of a `DataPage` of `Boolean` parquet boolean type
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)]
+pub enum BooleanPageState<'a> {
+    Optional(utils::DefLevelsDecoder<'a>, BitmapIter<'a>),
+    Required(&'a [u8], usize),
+}
+
+impl<'a> BooleanPageState<'a> {
+    pub fn try_new(page: &'a DataPage) -> Result<Self, Error> {
+        let is_optional =
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+
+        match (page.encoding(), is_optional) {
+            (Encoding::Plain, true) => {
+                let validity = utils::DefLevelsDecoder::try_new(page)?;
+
+                let (_, _, values) = split_buffer(page)?;
+                let values = BitmapIter::new(values, 0, values.len() * 8);
+
+                Ok(Self::Optional(validity, values))
+            },
+            (Encoding::Plain, false) => {
+                let (_, _, values) = split_buffer(page)?;
+                Ok(Self::Required(values, page.num_values()))
+            },
+            _ => Err(Error::InvalidParameter(format!(
+                "Viewing page for encoding {:?} for boolean type not supported",
+                page.encoding(),
+            ))),
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/deserialize/filtered_rle.rs b/crates/polars-parquet/src/parquet/deserialize/filtered_rle.rs
new file mode 100644
index 000000000000..57c95c77b401
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/filtered_rle.rs
@@ -0,0 +1,274 @@
+use std::collections::VecDeque;
+
+use super::{HybridDecoderBitmapIter, HybridEncoded};
+use crate::parquet::encoding::hybrid_rle::BitmapIter;
+use crate::parquet::error::Error;
+use crate::parquet::indexes::Interval;
+
+/// Type definition of a [`FilteredHybridBitmapIter`] of [`HybridDecoderBitmapIter`].
+pub type FilteredHybridRleDecoderIter<'a> =
+    FilteredHybridBitmapIter<'a, HybridDecoderBitmapIter<'a>>;
+
+/// The decoding state of the hybrid-RLE decoder with a maximum definition level of 1
+/// that can supports skipped runs
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum FilteredHybridEncoded<'a> {
+    /// a bitmap (values, offset, length, skipped_set)
+    Bitmap {
+        values: &'a [u8],
+        offset: usize,
+        length: usize,
+    },
+    Repeated {
+        is_set: bool,
+        length: usize,
+    },
+    /// When the run was skipped - contains the number of set values on the skipped run
+    Skipped(usize),
+}
+
+fn is_set_count(values: &[u8], offset: usize, length: usize) -> usize {
+    BitmapIter::new(values, offset, length)
+        .filter(|x| *x)
+        .count()
+}
+
+impl<'a> FilteredHybridEncoded<'a> {
+    /// Returns the length of the run in number of items
+    #[inline]
+    pub fn len(&self) -> usize {
+        match self {
+            FilteredHybridEncoded::Bitmap { length, .. } => *length,
+            FilteredHybridEncoded::Repeated { length, .. } => *length,
+            FilteredHybridEncoded::Skipped(_) => 0,
+        }
+    }
+
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+/// An [`Iterator`] adapter over [`HybridEncoded`] that yields [`FilteredHybridEncoded`].
+///
+/// This iterator adapter is used in combination with
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FilteredHybridBitmapIter<'a, I: Iterator<Item = Result<HybridEncoded<'a>, Error>>> {
+    iter: I,
+    current: Option<(HybridEncoded<'a>, usize)>,
+    // a run may end in the middle of an interval, in which case we must
+    // split the interval in parts. This tracks the current interval being computed
+    current_interval: Option<Interval>,
+    selected_rows: VecDeque<Interval>,
+    current_items_in_runs: usize,
+
+    total_items: usize,
+}
+
+impl<'a, I: Iterator<Item = Result<HybridEncoded<'a>, Error>>> FilteredHybridBitmapIter<'a, I> {
+    pub fn new(iter: I, selected_rows: VecDeque<Interval>) -> Self {
+        let total_items = selected_rows.iter().map(|x| x.length).sum();
+        Self {
+            iter,
+            current: None,
+            current_interval: None,
+            selected_rows,
+            current_items_in_runs: 0,
+            total_items,
+        }
+    }
+
+    fn advance_current_interval(&mut self, length: usize) {
+        if let Some(interval) = &mut self.current_interval {
+            interval.start += length;
+            interval.length -= length;
+            self.total_items -= length;
+        }
+    }
+
+    /// Returns the number of elements remaining. Note that each run
+    /// of the iterator contains more than one element - this is is _not_ equivalent to size_hint.
+    pub fn len(&self) -> usize {
+        self.total_items
+    }
+
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl<'a, I: Iterator<Item = Result<HybridEncoded<'a>, Error>>> Iterator
+    for FilteredHybridBitmapIter<'a, I>
+{
+    type Item = Result<FilteredHybridEncoded<'a>, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let interval = if let Some(interval) = self.current_interval {
+            interval
+        } else {
+            self.current_interval = self.selected_rows.pop_front();
+            self.current_interval?; // case where iteration finishes
+            return self.next();
+        };
+
+        let (run, offset) = if let Some((run, offset)) = self.current {
+            (run, offset)
+        } else {
+            // a new run
+            let run = self.iter.next()?; // no run => something wrong since intervals should only slice items up all runs' length
+            match run {
+                Ok(run) => {
+                    self.current = Some((run, 0));
+                },
+                Err(e) => return Some(Err(e)),
+            }
+            return self.next();
+        };
+
+        // one of three things can happen:
+        // * the start of the interval is not aligned with the start of the run => issue a `Skipped` and advance the run / next run
+        // * the run contains this interval => consume the interval and keep the run
+        // * the run contains part of this interval => consume the run and keep the interval
+
+        match run {
+            HybridEncoded::Repeated(is_set, full_run_length) => {
+                let run_length = full_run_length - offset;
+                // interval.start is from the start of the first run; discount `current_items_in_runs`
+                // to get the start from the current run's offset
+                let interval_start = interval.start - self.current_items_in_runs;
+
+                if interval_start > 0 {
+                    // we need to skip values from the run
+                    let to_skip = interval_start;
+
+                    // we only skip up to a run (yield a single skip per multiple runs)
+                    let max_skip = full_run_length - offset;
+                    let to_skip = to_skip.min(max_skip);
+
+                    let set = if is_set { to_skip } else { 0 };
+
+                    self.current_items_in_runs += to_skip;
+
+                    self.current = if to_skip == max_skip {
+                        None
+                    } else {
+                        Some((run, offset + to_skip))
+                    };
+
+                    return Some(Ok(FilteredHybridEncoded::Skipped(set)));
+                };
+
+                // slice the bitmap according to current interval
+                // note that interval start is from the start of the first run.
+                let new_offset = offset + interval_start;
+
+                if interval_start > run_length {
+                    let set = if is_set { run_length } else { 0 };
+
+                    self.advance_current_interval(run_length);
+                    self.current_items_in_runs += run_length;
+                    self.current = None;
+                    Some(Ok(FilteredHybridEncoded::Skipped(set)))
+                } else {
+                    let length = if run_length > interval.length {
+                        // interval is fully consumed
+                        self.current_items_in_runs += interval.length;
+
+                        // fetch next interval
+                        self.total_items -= interval.length;
+                        self.current_interval = self.selected_rows.pop_front();
+
+                        self.current = Some((run, offset + interval.length));
+
+                        interval.length
+                    } else {
+                        // the run is consumed and the interval is shortened accordingly
+                        self.current_items_in_runs += run_length;
+
+                        // the interval may cover two runs; shorten the length
+                        // to its maximum allowed for this run
+                        let length = run_length.min(full_run_length - new_offset);
+
+                        self.advance_current_interval(length);
+
+                        self.current = None;
+                        length
+                    };
+                    Some(Ok(FilteredHybridEncoded::Repeated { is_set, length }))
+                }
+            },
+            HybridEncoded::Bitmap(values, full_run_length) => {
+                let run_length = full_run_length - offset;
+                // interval.start is from the start of the first run; discount `current_items_in_runs`
+                // to get the start from the current run's offset
+                let interval_start = interval.start - self.current_items_in_runs;
+
+                if interval_start > 0 {
+                    // we need to skip values from the run
+                    let to_skip = interval_start;
+
+                    // we only skip up to a run (yield a single skip per multiple runs)
+                    let max_skip = full_run_length - offset;
+                    let to_skip = to_skip.min(max_skip);
+
+                    let set = is_set_count(values, offset, to_skip);
+
+                    self.current_items_in_runs += to_skip;
+
+                    self.current = if to_skip == max_skip {
+                        None
+                    } else {
+                        Some((run, offset + to_skip))
+                    };
+
+                    return Some(Ok(FilteredHybridEncoded::Skipped(set)));
+                };
+
+                // slice the bitmap according to current interval
+                // note that interval start is from the start of the first run.
+                let new_offset = offset + interval_start;
+
+                if interval_start > run_length {
+                    let set = is_set_count(values, offset, full_run_length);
+
+                    self.advance_current_interval(run_length);
+                    self.current_items_in_runs += run_length;
+                    self.current = None;
+                    Some(Ok(FilteredHybridEncoded::Skipped(set)))
+                } else {
+                    let length = if run_length > interval.length {
+                        // interval is fully consumed
+                        self.current_items_in_runs += interval.length;
+
+                        // fetch next interval
+                        self.total_items -= interval.length;
+                        self.current_interval = self.selected_rows.pop_front();
+
+                        self.current = Some((run, offset + interval.length));
+
+                        interval.length
+                    } else {
+                        // the run is consumed and the interval is shortened accordingly
+                        self.current_items_in_runs += run_length;
+
+                        // the interval may cover two runs; shorten the length
+                        // to its maximum allowed for this run
+                        let length = run_length.min(full_run_length - new_offset);
+
+                        self.advance_current_interval(length);
+
+                        self.current = None;
+                        length
+                    };
+                    Some(Ok(FilteredHybridEncoded::Bitmap {
+                        values,
+                        offset: new_offset,
+                        length,
+                    }))
+                }
+            },
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/deserialize/fixed_len.rs b/crates/polars-parquet/src/parquet/deserialize/fixed_len.rs
new file mode 100644
index 000000000000..b0885cc6ad4c
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/fixed_len.rs
@@ -0,0 +1,107 @@
+use super::utils;
+use crate::parquet::encoding::hybrid_rle;
+use crate::parquet::error::Error;
+use crate::parquet::page::{split_buffer, DataPage};
+use crate::parquet::parquet_bridge::{Encoding, Repetition};
+use crate::parquet::schema::types::PhysicalType;
+
+#[derive(Debug)]
+pub struct FixexBinaryIter<'a> {
+    values: std::slice::ChunksExact<'a, u8>,
+}
+
+impl<'a> FixexBinaryIter<'a> {
+    pub fn new(values: &'a [u8], size: usize) -> Self {
+        let values = values.chunks_exact(size);
+        Self { values }
+    }
+}
+
+impl<'a> Iterator for FixexBinaryIter<'a> {
+    type Item = &'a [u8];
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.values.next()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.values.size_hint()
+    }
+}
+
+#[derive(Debug)]
+pub struct Dictionary<'a, P> {
+    pub indexes: hybrid_rle::HybridRleDecoder<'a>,
+    pub dict: P,
+}
+
+impl<'a, P> Dictionary<'a, P> {
+    pub fn try_new(page: &'a DataPage, dict: P) -> Result<Self, Error> {
+        let indexes = utils::dict_indices_decoder(page)?;
+
+        Ok(Self { indexes, dict })
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.indexes.size_hint().0
+    }
+}
+
+#[allow(clippy::large_enum_variant)]
+pub enum FixedLenBinaryPageState<'a, P> {
+    Optional(utils::DefLevelsDecoder<'a>, FixexBinaryIter<'a>),
+    Required(FixexBinaryIter<'a>),
+    RequiredDictionary(Dictionary<'a, P>),
+    OptionalDictionary(utils::DefLevelsDecoder<'a>, Dictionary<'a, P>),
+}
+
+impl<'a, P> FixedLenBinaryPageState<'a, P> {
+    pub fn try_new(page: &'a DataPage, dict: Option<P>) -> Result<Self, Error> {
+        let is_optional =
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+
+        let size: usize = if let PhysicalType::FixedLenByteArray(size) =
+            page.descriptor.primitive_type.physical_type
+        {
+            size
+        } else {
+            return Err(Error::InvalidParameter(
+                "FixedLenBinaryPageState must be initialized by pages of FixedLenByteArray"
+                    .to_string(),
+            ));
+        };
+
+        match (page.encoding(), dict, is_optional) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
+                Dictionary::try_new(page, dict).map(Self::RequiredDictionary)
+            },
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+                Ok(Self::OptionalDictionary(
+                    utils::DefLevelsDecoder::try_new(page)?,
+                    Dictionary::try_new(page, dict)?,
+                ))
+            },
+            (Encoding::Plain, _, true) => {
+                let (_, _, values) = split_buffer(page)?;
+
+                let validity = utils::DefLevelsDecoder::try_new(page)?;
+                let values = FixexBinaryIter::new(values, size);
+
+                Ok(Self::Optional(validity, values))
+            },
+            (Encoding::Plain, _, false) => {
+                let (_, _, values) = split_buffer(page)?;
+                let values = FixexBinaryIter::new(values, size);
+
+                Ok(Self::Required(values))
+            },
+            _ => Err(Error::FeatureNotSupported(format!(
+                "Viewing page for encoding {:?} for binary type",
+                page.encoding(),
+            ))),
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/deserialize/hybrid_rle.rs b/crates/polars-parquet/src/parquet/deserialize/hybrid_rle.rs
new file mode 100644
index 000000000000..746dd27b330d
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/hybrid_rle.rs
@@ -0,0 +1,204 @@
+use crate::parquet::encoding::hybrid_rle::{self, BitmapIter};
+use crate::parquet::error::Error;
+
+/// The decoding state of the hybrid-RLE decoder with a maximum definition level of 1
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum HybridEncoded<'a> {
+    /// a bitmap
+    Bitmap(&'a [u8], usize),
+    /// A repeated item. The first attribute corresponds to whether the value is set
+    /// the second attribute corresponds to the number of repetitions.
+    Repeated(bool, usize),
+}
+
+impl<'a> HybridEncoded<'a> {
+    /// Returns the length of the run in number of items
+    #[inline]
+    pub fn len(&self) -> usize {
+        match self {
+            HybridEncoded::Bitmap(_, length) => *length,
+            HybridEncoded::Repeated(_, length) => *length,
+        }
+    }
+
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+pub trait HybridRleRunsIterator<'a>: Iterator<Item = Result<HybridEncoded<'a>, Error>> {
+    /// Number of elements remaining. This may not be the items of the iterator - an item
+    /// of the iterator may contain more than one element.
+    fn number_of_elements(&self) -> usize;
+}
+
+/// An iterator of [`HybridEncoded`], adapter over [`hybrid_rle::HybridEncoded`].
+#[derive(Debug, Clone)]
+pub struct HybridRleIter<'a, I>
+where
+    I: Iterator<Item = Result<hybrid_rle::HybridEncoded<'a>, Error>>,
+{
+    iter: I,
+    length: usize,
+    consumed: usize,
+}
+
+impl<'a, I> HybridRleIter<'a, I>
+where
+    I: Iterator<Item = Result<hybrid_rle::HybridEncoded<'a>, Error>>,
+{
+    /// Returns a new [`HybridRleIter`]
+    #[inline]
+    pub fn new(iter: I, length: usize) -> Self {
+        Self {
+            iter,
+            length,
+            consumed: 0,
+        }
+    }
+
+    /// the number of elements in the iterator. Note that this _is not_ the number of runs.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.length - self.consumed
+    }
+
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl<'a, I> HybridRleRunsIterator<'a> for HybridRleIter<'a, I>
+where
+    I: Iterator<Item = Result<hybrid_rle::HybridEncoded<'a>, Error>>,
+{
+    fn number_of_elements(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<'a, I> Iterator for HybridRleIter<'a, I>
+where
+    I: Iterator<Item = Result<hybrid_rle::HybridEncoded<'a>, Error>>,
+{
+    type Item = Result<HybridEncoded<'a>, Error>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.consumed == self.length {
+            return None;
+        };
+        let run = self.iter.next()?;
+
+        Some(run.map(|run| match run {
+            hybrid_rle::HybridEncoded::Bitpacked(pack) => {
+                // a pack has at most `pack.len() * 8` bits
+                let pack_size = pack.len() * 8;
+
+                let additional = pack_size.min(self.len());
+
+                self.consumed += additional;
+                HybridEncoded::Bitmap(pack, additional)
+            },
+            hybrid_rle::HybridEncoded::Rle(value, length) => {
+                let is_set = value[0] == 1;
+
+                let additional = length.min(self.len());
+
+                self.consumed += additional;
+                HybridEncoded::Repeated(is_set, additional)
+            },
+        }))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+}
+
+/// Type definition for a [`HybridRleIter`] using [`hybrid_rle::Decoder`].
+pub type HybridDecoderBitmapIter<'a> = HybridRleIter<'a, hybrid_rle::Decoder<'a>>;
+
+#[derive(Debug)]
+enum HybridBooleanState<'a> {
+    /// a bitmap
+    Bitmap(BitmapIter<'a>),
+    /// A repeated item. The first attribute corresponds to whether the value is set
+    /// the second attribute corresponds to the number of repetitions.
+    Repeated(bool, usize),
+}
+
+/// An iterator adapter that maps an iterator of [`HybridEncoded`] into an iterator
+/// over [`bool`].
+#[derive(Debug)]
+pub struct HybridRleBooleanIter<'a, I>
+where
+    I: Iterator<Item = Result<HybridEncoded<'a>, Error>>,
+{
+    iter: I,
+    current_run: Option<HybridBooleanState<'a>>,
+}
+
+impl<'a, I> HybridRleBooleanIter<'a, I>
+where
+    I: Iterator<Item = Result<HybridEncoded<'a>, Error>>,
+{
+    pub fn new(iter: I) -> Self {
+        Self {
+            iter,
+            current_run: None,
+        }
+    }
+}
+
+impl<'a, I> Iterator for HybridRleBooleanIter<'a, I>
+where
+    I: HybridRleRunsIterator<'a>,
+{
+    type Item = Result<bool, Error>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(run) = &mut self.current_run {
+            match run {
+                HybridBooleanState::Bitmap(bitmap) => bitmap.next().map(Ok),
+                HybridBooleanState::Repeated(value, remaining) => if *remaining == 0 {
+                    None
+                } else {
+                    *remaining -= 1;
+                    Some(*value)
+                }
+                .map(Ok),
+            }
+        } else if let Some(run) = self.iter.next() {
+            let run = run.map(|run| match run {
+                HybridEncoded::Bitmap(bitmap, length) => {
+                    HybridBooleanState::Bitmap(BitmapIter::new(bitmap, 0, length))
+                },
+                HybridEncoded::Repeated(value, length) => {
+                    HybridBooleanState::Repeated(value, length)
+                },
+            });
+            match run {
+                Ok(run) => {
+                    self.current_run = Some(run);
+                    self.next()
+                },
+                Err(e) => Some(Err(e)),
+            }
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let exact = self.iter.number_of_elements();
+        (exact, Some(exact))
+    }
+}
+
+/// Type definition for a [`HybridRleBooleanIter`] using [`hybrid_rle::Decoder`].
+pub type HybridRleDecoderIter<'a> = HybridRleBooleanIter<'a, HybridDecoderBitmapIter<'a>>;
diff --git a/crates/polars-parquet/src/parquet/deserialize/mod.rs b/crates/polars-parquet/src/parquet/deserialize/mod.rs
new file mode 100644
index 000000000000..f16fbb901bce
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/mod.rs
@@ -0,0 +1,17 @@
+// TODO! fix and/or prune?
+#![allow(ambiguous_glob_reexports)]
+mod binary;
+mod boolean;
+mod filtered_rle;
+mod fixed_len;
+mod hybrid_rle;
+mod native;
+mod utils;
+
+pub use binary::*;
+pub use boolean::*;
+pub use filtered_rle::*;
+pub use fixed_len::*;
+pub use hybrid_rle::*;
+pub use native::*;
+pub use utils::{DefLevelsDecoder, OptionalValues, SliceFilteredIter};
diff --git a/crates/polars-parquet/src/parquet/deserialize/native.rs b/crates/polars-parquet/src/parquet/deserialize/native.rs
new file mode 100644
index 000000000000..11a9cf2c3e26
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/native.rs
@@ -0,0 +1,97 @@
+use super::utils;
+use crate::parquet::encoding::hybrid_rle;
+use crate::parquet::error::Error;
+use crate::parquet::page::{split_buffer, DataPage};
+use crate::parquet::parquet_bridge::{Encoding, Repetition};
+use crate::parquet::types::{decode, NativeType};
+
+/// Typedef of an iterator over PLAIN page values
+pub type Casted<'a, T> = std::iter::Map<std::slice::ChunksExact<'a, u8>, fn(&'a [u8]) -> T>;
+
+/// Views the values of the data page as [`Casted`] to [`NativeType`].
+pub fn native_cast<T: NativeType>(page: &DataPage) -> Result<Casted<T>, Error> {
+    let (_, _, values) = split_buffer(page)?;
+    if values.len() % std::mem::size_of::<T>() != 0 {
+        return Err(Error::oos(
+            "A primitive page data's len must be a multiple of the type",
+        ));
+    }
+
+    Ok(values
+        .chunks_exact(std::mem::size_of::<T>())
+        .map(decode::<T>))
+}
+
+#[derive(Debug)]
+pub struct Dictionary<'a, P> {
+    pub indexes: hybrid_rle::HybridRleDecoder<'a>,
+    pub dict: P,
+}
+
+impl<'a, P> Dictionary<'a, P> {
+    pub fn try_new(page: &'a DataPage, dict: P) -> Result<Self, Error> {
+        let indexes = utils::dict_indices_decoder(page)?;
+
+        Ok(Self { dict, indexes })
+    }
+
+    pub fn len(&self) -> usize {
+        self.indexes.size_hint().0
+    }
+
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+/// The deserialization state of a `DataPage` of `Primitive` parquet primitive type
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)]
+pub enum NativePageState<'a, T, P>
+where
+    T: NativeType,
+{
+    /// A page of optional values
+    Optional(utils::DefLevelsDecoder<'a>, Casted<'a, T>),
+    /// A page of required values
+    Required(Casted<'a, T>),
+    /// A page of required, dictionary-encoded values
+    RequiredDictionary(Dictionary<'a, P>),
+    /// A page of optional, dictionary-encoded values
+    OptionalDictionary(utils::DefLevelsDecoder<'a>, Dictionary<'a, P>),
+}
+
+impl<'a, T: NativeType, P> NativePageState<'a, T, P> {
+    /// Tries to create [`NativePageState`]
+    /// # Error
+    /// Errors iff the page is not a `NativePageState`
+    pub fn try_new(page: &'a DataPage, dict: Option<P>) -> Result<Self, Error> {
+        let is_optional =
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+
+        match (page.encoding(), dict, is_optional) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
+                Dictionary::try_new(page, dict).map(Self::RequiredDictionary)
+            },
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+                Ok(Self::OptionalDictionary(
+                    utils::DefLevelsDecoder::try_new(page)?,
+                    Dictionary::try_new(page, dict)?,
+                ))
+            },
+            (Encoding::Plain, _, true) => {
+                let validity = utils::DefLevelsDecoder::try_new(page)?;
+                let values = native_cast(page)?;
+
+                Ok(Self::Optional(validity, values))
+            },
+            (Encoding::Plain, _, false) => native_cast(page).map(Self::Required),
+            _ => Err(Error::FeatureNotSupported(format!(
+                "Viewing page for encoding {:?} for native type {}",
+                page.encoding(),
+                std::any::type_name::<T>()
+            ))),
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/deserialize/utils.rs b/crates/polars-parquet/src/parquet/deserialize/utils.rs
new file mode 100644
index 000000000000..0c89d09d4648
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/deserialize/utils.rs
@@ -0,0 +1,174 @@
+use std::collections::VecDeque;
+
+use super::hybrid_rle::{HybridDecoderBitmapIter, HybridRleIter};
+use crate::parquet::encoding::hybrid_rle::{self, HybridRleDecoder};
+use crate::parquet::error::Error;
+use crate::parquet::indexes::Interval;
+use crate::parquet::page::{split_buffer, DataPage};
+use crate::parquet::read::levels::get_bit_width;
+
+pub(super) fn dict_indices_decoder(page: &DataPage) -> Result<hybrid_rle::HybridRleDecoder, Error> {
+    let (_, _, indices_buffer) = split_buffer(page)?;
+
+    // SPEC: Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32),
+    // SPEC: followed by the values encoded using RLE/Bit packed described above (with the given bit width).
+    let bit_width = indices_buffer[0];
+    if bit_width > 32 {
+        return Err(Error::oos(
+            "Bit width of dictionary pages cannot be larger than 32",
+        ));
+    }
+    let indices_buffer = &indices_buffer[1..];
+
+    hybrid_rle::HybridRleDecoder::try_new(indices_buffer, bit_width as u32, page.num_values())
+}
+
+/// Decoder of definition levels.
+#[derive(Debug)]
+pub enum DefLevelsDecoder<'a> {
+    /// When the maximum definition level is 1, the definition levels are RLE-encoded and
+    /// the bitpacked runs are bitmaps. This variant contains [`HybridDecoderBitmapIter`]
+    /// that decodes the runs, but not the individual values
+    Bitmap(HybridDecoderBitmapIter<'a>),
+    /// When the maximum definition level is larger than 1
+    Levels(HybridRleDecoder<'a>, u32),
+}
+
+impl<'a> DefLevelsDecoder<'a> {
+    pub fn try_new(page: &'a DataPage) -> Result<Self, Error> {
+        let (_, def_levels, _) = split_buffer(page)?;
+
+        let max_def_level = page.descriptor.max_def_level;
+        Ok(if max_def_level == 1 {
+            let iter = hybrid_rle::Decoder::new(def_levels, 1);
+            let iter = HybridRleIter::new(iter, page.num_values());
+            Self::Bitmap(iter)
+        } else {
+            let iter = HybridRleDecoder::try_new(
+                def_levels,
+                get_bit_width(max_def_level),
+                page.num_values(),
+            )?;
+            Self::Levels(iter, max_def_level as u32)
+        })
+    }
+}
+
+/// Iterator adapter to convert an iterator of non-null values and an iterator over validity
+/// into an iterator of optional values.
+#[derive(Debug, Clone)]
+pub struct OptionalValues<T, V: Iterator<Item = Result<bool, Error>>, I: Iterator<Item = T>> {
+    validity: V,
+    values: I,
+}
+
+impl<T, V: Iterator<Item = Result<bool, Error>>, I: Iterator<Item = T>> OptionalValues<T, V, I> {
+    pub fn new(validity: V, values: I) -> Self {
+        Self { validity, values }
+    }
+}
+
+impl<T, V: Iterator<Item = Result<bool, Error>>, I: Iterator<Item = T>> Iterator
+    for OptionalValues<T, V, I>
+{
+    type Item = Result<Option<T>, Error>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.validity
+            .next()
+            .map(|x| x.map(|x| if x { self.values.next() } else { None }))
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.validity.size_hint()
+    }
+}
+
+/// An iterator adapter that converts an iterator over items into an iterator over slices of
+/// those N items.
+///
+/// This iterator is best used with iterators that implement `nth` since skipping items
+/// allows this iterator to skip sequences of items without having to call each of them.
+#[derive(Debug, Clone)]
+pub struct SliceFilteredIter<I> {
+    iter: I,
+    selected_rows: VecDeque<Interval>,
+    current_remaining: usize,
+    current: usize, // position in the slice
+    total_length: usize,
+}
+
+impl<I> SliceFilteredIter<I> {
+    /// Return a new [`SliceFilteredIter`]
+    pub fn new(iter: I, selected_rows: VecDeque<Interval>) -> Self {
+        let total_length = selected_rows.iter().map(|i| i.length).sum();
+        Self {
+            iter,
+            selected_rows,
+            current_remaining: 0,
+            current: 0,
+            total_length,
+        }
+    }
+}
+
+impl<T, I: Iterator<Item = T>> Iterator for SliceFilteredIter<I> {
+    type Item = T;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.current_remaining == 0 {
+            if let Some(interval) = self.selected_rows.pop_front() {
+                // skip the hole between the previous start and this start
+                // (start + length) - start
+                let item = self.iter.nth(interval.start - self.current);
+                self.current = interval.start + interval.length;
+                self.current_remaining = interval.length - 1;
+                self.total_length -= 1;
+                item
+            } else {
+                None
+            }
+        } else {
+            self.current_remaining -= 1;
+            self.total_length -= 1;
+            self.iter.next()
+        }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.total_length, Some(self.total_length))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::collections::VecDeque;
+
+    use super::*;
+
+    #[test]
+    fn basic() {
+        let iter = 0..=100;
+
+        let intervals = vec![
+            Interval::new(0, 2),
+            Interval::new(20, 11),
+            Interval::new(31, 1),
+        ];
+
+        let a: VecDeque<Interval> = intervals.clone().into_iter().collect();
+        let mut a = SliceFilteredIter::new(iter, a);
+
+        let expected: Vec<usize> = intervals
+            .into_iter()
+            .flat_map(|interval| interval.start..(interval.start + interval.length))
+            .collect();
+
+        assert_eq!(expected, a.by_ref().collect::<Vec<_>>());
+        assert_eq!((0, Some(0)), a.size_hint());
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs
new file mode 100644
index 000000000000..ea7bde3fd45b
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/decode.rs
@@ -0,0 +1,211 @@
+use super::{Packed, Unpackable, Unpacked};
+use crate::parquet::error::Error;
+
+/// An [`Iterator`] of [`Unpackable`] unpacked from a bitpacked slice of bytes.
+/// # Implementation
+/// This iterator unpacks bytes in chunks and does not allocate.
+#[derive(Debug, Clone)]
+pub struct Decoder<'a, T: Unpackable> {
+    packed: std::slice::Chunks<'a, u8>,
+    num_bits: usize,
+    remaining: usize,          // in number of items
+    current_pack_index: usize, // invariant: < T::PACK_LENGTH
+    unpacked: T::Unpacked,     // has the current unpacked values.
+}
+
+#[inline]
+fn decode_pack<T: Unpackable>(packed: &[u8], num_bits: usize, unpacked: &mut T::Unpacked) {
+    if packed.len() < T::Unpacked::LENGTH * num_bits / 8 {
+        let mut buf = T::Packed::zero();
+        buf.as_mut()[..packed.len()].copy_from_slice(packed);
+        T::unpack(buf.as_ref(), num_bits, unpacked)
+    } else {
+        T::unpack(packed, num_bits, unpacked)
+    }
+}
+
+impl<'a, T: Unpackable> Decoder<'a, T> {
+    /// Returns a [`Decoder`] with `T` encoded in `packed` with `num_bits`.
+    pub fn try_new(packed: &'a [u8], num_bits: usize, mut length: usize) -> Result<Self, Error> {
+        let block_size = std::mem::size_of::<T>() * num_bits;
+
+        if num_bits == 0 {
+            return Err(Error::oos("Bitpacking requires num_bits > 0"));
+        }
+
+        if packed.len() * 8 < length * num_bits {
+            return Err(Error::oos(format!(
+                "Unpacking {length} items with a number of bits {num_bits} requires at least {} bytes.",
+                length * num_bits / 8
+            )));
+        }
+
+        let mut packed = packed.chunks(block_size);
+        let mut unpacked = T::Unpacked::zero();
+        if let Some(chunk) = packed.next() {
+            decode_pack::<T>(chunk, num_bits, &mut unpacked);
+        } else {
+            length = 0
+        };
+
+        Ok(Self {
+            remaining: length,
+            packed,
+            num_bits,
+            unpacked,
+            current_pack_index: 0,
+        })
+    }
+}
+
+impl<'a, T: Unpackable> Iterator for Decoder<'a, T> {
+    type Item = T;
+
+    #[inline] // -71% improvement in bench
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining == 0 {
+            return None;
+        }
+        let result = self.unpacked[self.current_pack_index];
+        self.current_pack_index += 1;
+        self.remaining -= 1;
+        if self.current_pack_index == T::Unpacked::LENGTH {
+            if let Some(packed) = self.packed.next() {
+                decode_pack::<T>(packed, self.num_bits, &mut self.unpacked);
+                self.current_pack_index = 0;
+            }
+        }
+        Some(result)
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.remaining, Some(self.remaining))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::tests::case1;
+    use super::*;
+
+    #[test]
+    fn test_decode_rle() {
+        // Test data: 0-7 with bit width 3
+        // 0: 000
+        // 1: 001
+        // 2: 010
+        // 3: 011
+        // 4: 100
+        // 5: 101
+        // 6: 110
+        // 7: 111
+        let num_bits = 3;
+        let length = 8;
+        // encoded: 0b10001000u8, 0b11000110, 0b11111010
+        let data = vec![0b10001000u8, 0b11000110, 0b11111010];
+
+        let decoded = Decoder::<u32>::try_new(&data, num_bits, length)
+            .unwrap()
+            .collect::<Vec<_>>();
+        assert_eq!(decoded, vec![0, 1, 2, 3, 4, 5, 6, 7]);
+    }
+
+    #[test]
+    fn decode_large() {
+        let (num_bits, expected, data) = case1();
+
+        let decoded = Decoder::<u32>::try_new(&data, num_bits, expected.len())
+            .unwrap()
+            .collect::<Vec<_>>();
+        assert_eq!(decoded, expected);
+    }
+
+    #[test]
+    fn test_decode_bool() {
+        let num_bits = 1;
+        let length = 8;
+        let data = vec![0b10101010];
+
+        let decoded = Decoder::<u32>::try_new(&data, num_bits, length)
+            .unwrap()
+            .collect::<Vec<_>>();
+        assert_eq!(decoded, vec![0, 1, 0, 1, 0, 1, 0, 1]);
+    }
+
+    #[test]
+    fn test_decode_u64() {
+        let num_bits = 1;
+        let length = 8;
+        let data = vec![0b10101010];
+
+        let decoded = Decoder::<u64>::try_new(&data, num_bits, length)
+            .unwrap()
+            .collect::<Vec<_>>();
+        assert_eq!(decoded, vec![0, 1, 0, 1, 0, 1, 0, 1]);
+    }
+
+    #[test]
+    fn even_case() {
+        // [0, 1, 2, 3, 4, 5, 6, 0]x99
+        let data = &[0b10001000u8, 0b11000110, 0b00011010];
+        let num_bits = 3;
+        let copies = 99; // 8 * 99 % 32 != 0
+        let expected = std::iter::repeat(&[0u32, 1, 2, 3, 4, 5, 6, 0])
+            .take(copies)
+            .flatten()
+            .copied()
+            .collect::<Vec<_>>();
+        let data = std::iter::repeat(data)
+            .take(copies)
+            .flatten()
+            .copied()
+            .collect::<Vec<_>>();
+        let length = expected.len();
+
+        let decoded = Decoder::<u32>::try_new(&data, num_bits, length)
+            .unwrap()
+            .collect::<Vec<_>>();
+        assert_eq!(decoded, expected);
+    }
+
+    #[test]
+    fn odd_case() {
+        // [0, 1, 2, 3, 4, 5, 6, 0]x4 + [2]
+        let data = &[0b10001000u8, 0b11000110, 0b00011010];
+        let num_bits = 3;
+        let copies = 4;
+        let expected = std::iter::repeat(&[0u32, 1, 2, 3, 4, 5, 6, 0])
+            .take(copies)
+            .flatten()
+            .copied()
+            .chain(std::iter::once(2))
+            .collect::<Vec<_>>();
+        let data = std::iter::repeat(data)
+            .take(copies)
+            .flatten()
+            .copied()
+            .chain(std::iter::once(0b00000010u8))
+            .collect::<Vec<_>>();
+        let length = expected.len();
+
+        let decoded = Decoder::<u32>::try_new(&data, num_bits, length)
+            .unwrap()
+            .collect::<Vec<_>>();
+        assert_eq!(decoded, expected);
+    }
+
+    #[test]
+    fn test_errors() {
+        // zero length
+        assert!(Decoder::<u64>::try_new(&[], 1, 0).is_ok());
+        // no bytes
+        assert!(Decoder::<u64>::try_new(&[], 1, 1).is_err());
+        // too few bytes
+        assert!(Decoder::<u64>::try_new(&[1], 1, 8).is_ok());
+        assert!(Decoder::<u64>::try_new(&[1, 1], 2, 8).is_ok());
+        assert!(Decoder::<u64>::try_new(&[1], 1, 9).is_err());
+        // zero num_bits
+        assert!(Decoder::<u64>::try_new(&[1], 0, 1).is_err());
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/encode.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/encode.rs
new file mode 100644
index 000000000000..904ff796dd34
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/encode.rs
@@ -0,0 +1,54 @@
+use std::convert::TryInto;
+
+use super::{Packed, Unpackable, Unpacked};
+
+/// Encodes (packs) a slice of [`Unpackable`] into bitpacked bytes `packed`, using `num_bits` per value.
+///
+/// This function assumes that the maximum value in `unpacked` fits in `num_bits` bits
+/// and saturates higher values.
+///
+/// Only the first `ceil8(unpacked.len() * num_bits)` of `packed` are populated.
+pub fn encode<T: Unpackable>(unpacked: &[T], num_bits: usize, packed: &mut [u8]) {
+    let chunks = unpacked.chunks_exact(T::Unpacked::LENGTH);
+
+    let remainder = chunks.remainder();
+
+    let packed_size = (T::Unpacked::LENGTH * num_bits + 7) / 8;
+    if !remainder.is_empty() {
+        let packed_chunks = packed.chunks_mut(packed_size);
+        let mut last_chunk = T::Unpacked::zero();
+        for i in 0..remainder.len() {
+            last_chunk[i] = remainder[i]
+        }
+
+        chunks
+            .chain(std::iter::once(last_chunk.as_ref()))
+            .zip(packed_chunks)
+            .for_each(|(unpacked, packed)| {
+                T::pack(&unpacked.try_into().unwrap(), num_bits, packed);
+            });
+    } else {
+        let packed_chunks = packed.chunks_exact_mut(packed_size);
+        chunks.zip(packed_chunks).for_each(|(unpacked, packed)| {
+            T::pack(&unpacked.try_into().unwrap(), num_bits, packed);
+        });
+    }
+}
+
+/// Encodes (packs) a potentially incomplete pack of [`Unpackable`] into bitpacked
+/// bytes `packed`, using `num_bits` per value.
+///
+/// This function assumes that the maximum value in `unpacked` fits in `num_bits` bits
+/// and saturates higher values.
+///
+/// Only the first `ceil8(unpacked.len() * num_bits)` of `packed` are populated.
+#[inline]
+pub fn encode_pack<T: Unpackable>(unpacked: &[T], num_bits: usize, packed: &mut [u8]) {
+    if unpacked.len() < T::Packed::LENGTH {
+        let mut complete_unpacked = T::Unpacked::zero();
+        complete_unpacked.as_mut()[..unpacked.len()].copy_from_slice(unpacked);
+        T::pack(&complete_unpacked, num_bits, packed)
+    } else {
+        T::pack(&unpacked.try_into().unwrap(), num_bits, packed)
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs
new file mode 100644
index 000000000000..a05ca2040431
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/mod.rs
@@ -0,0 +1,220 @@
+mod decode;
+mod encode;
+mod pack;
+mod unpack;
+
+pub use decode::Decoder;
+pub use encode::{encode, encode_pack};
+
+/// A byte slice (e.g. `[u8; 8]`) denoting types that represent complete packs.
+pub trait Packed:
+    Copy
+    + Sized
+    + AsRef<[u8]>
+    + AsMut<[u8]>
+    + std::ops::IndexMut<usize, Output = u8>
+    + for<'a> TryFrom<&'a [u8]>
+{
+    const LENGTH: usize;
+    fn zero() -> Self;
+}
+
+impl Packed for [u8; 8] {
+    const LENGTH: usize = 8;
+    #[inline]
+    fn zero() -> Self {
+        [0; 8]
+    }
+}
+
+impl Packed for [u8; 16 * 2] {
+    const LENGTH: usize = 16 * 2;
+    #[inline]
+    fn zero() -> Self {
+        [0; 16 * 2]
+    }
+}
+
+impl Packed for [u8; 32 * 4] {
+    const LENGTH: usize = 32 * 4;
+    #[inline]
+    fn zero() -> Self {
+        [0; 32 * 4]
+    }
+}
+
+impl Packed for [u8; 64 * 64] {
+    const LENGTH: usize = 64 * 64;
+    #[inline]
+    fn zero() -> Self {
+        [0; 64 * 64]
+    }
+}
+
+/// A byte slice of [`Unpackable`] denoting complete unpacked arrays.
+pub trait Unpacked<T>:
+    Copy
+    + Sized
+    + AsRef<[T]>
+    + AsMut<[T]>
+    + std::ops::Index<usize, Output = T>
+    + std::ops::IndexMut<usize, Output = T>
+    + for<'a> TryFrom<&'a [T], Error = std::array::TryFromSliceError>
+{
+    const LENGTH: usize;
+    fn zero() -> Self;
+}
+
+impl Unpacked<u8> for [u8; 8] {
+    const LENGTH: usize = 8;
+    #[inline]
+    fn zero() -> Self {
+        [0; 8]
+    }
+}
+
+impl Unpacked<u16> for [u16; 16] {
+    const LENGTH: usize = 16;
+    #[inline]
+    fn zero() -> Self {
+        [0; 16]
+    }
+}
+
+impl Unpacked<u32> for [u32; 32] {
+    const LENGTH: usize = 32;
+    #[inline]
+    fn zero() -> Self {
+        [0; 32]
+    }
+}
+
+impl Unpacked<u64> for [u64; 64] {
+    const LENGTH: usize = 64;
+    #[inline]
+    fn zero() -> Self {
+        [0; 64]
+    }
+}
+
+/// A type representing a type that can be bitpacked and unpacked by this crate.
+pub trait Unpackable: Copy + Sized + Default {
+    type Packed: Packed;
+    type Unpacked: Unpacked<Self>;
+    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked);
+    fn pack(unpacked: &Self::Unpacked, num_bits: usize, packed: &mut [u8]);
+}
+
+impl Unpackable for u8 {
+    type Packed = [u8; 8];
+    type Unpacked = [u8; 8];
+
+    #[inline]
+    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
+        unpack::unpack8(packed, unpacked, num_bits)
+    }
+
+    #[inline]
+    fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
+        pack::pack8(packed, unpacked, num_bits)
+    }
+}
+
+impl Unpackable for u16 {
+    type Packed = [u8; 16 * 2];
+    type Unpacked = [u16; 16];
+
+    #[inline]
+    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
+        unpack::unpack16(packed, unpacked, num_bits)
+    }
+
+    #[inline]
+    fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
+        pack::pack16(packed, unpacked, num_bits)
+    }
+}
+
+impl Unpackable for u32 {
+    type Packed = [u8; 32 * 4];
+    type Unpacked = [u32; 32];
+
+    #[inline]
+    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
+        unpack::unpack32(packed, unpacked, num_bits)
+    }
+
+    #[inline]
+    fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
+        pack::pack32(packed, unpacked, num_bits)
+    }
+}
+
+impl Unpackable for u64 {
+    type Packed = [u8; 64 * 64];
+    type Unpacked = [u64; 64];
+
+    #[inline]
+    fn unpack(packed: &[u8], num_bits: usize, unpacked: &mut Self::Unpacked) {
+        unpack::unpack64(packed, unpacked, num_bits)
+    }
+
+    #[inline]
+    fn pack(packed: &Self::Unpacked, num_bits: usize, unpacked: &mut [u8]) {
+        pack::pack64(packed, unpacked, num_bits)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    pub fn case1() -> (usize, Vec<u32>, Vec<u8>) {
+        let num_bits = 3;
+        let compressed = vec![
+            0b10001000u8,
+            0b11000110,
+            0b11111010,
+            0b10001000u8,
+            0b11000110,
+            0b11111010,
+            0b10001000u8,
+            0b11000110,
+            0b11111010,
+            0b10001000u8,
+            0b11000110,
+            0b11111010,
+            0b10001000u8,
+            0b11000110,
+            0b11111010,
+        ];
+        let decompressed = vec![
+            0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4,
+            5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
+        ];
+        (num_bits, decompressed, compressed)
+    }
+
+    #[test]
+    fn encode_large() {
+        let (num_bits, unpacked, expected) = case1();
+        let mut packed = vec![0u8; 4 * 32];
+
+        encode(&unpacked, num_bits, &mut packed);
+        assert_eq!(&packed[..15], expected);
+    }
+
+    #[test]
+    fn test_encode() {
+        let num_bits = 3;
+        let unpacked = vec![0, 1, 2, 3, 4, 5, 6, 7];
+
+        let mut packed = vec![0u8; 4 * 32];
+
+        encode::<u32>(&unpacked, num_bits, &mut packed);
+
+        let expected = vec![0b10001000u8, 0b11000110, 0b11111010];
+
+        assert_eq!(&packed[..3], expected);
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs
new file mode 100644
index 000000000000..55183d36d641
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/pack.rs
@@ -0,0 +1,108 @@
+/// Macro that generates a packing function taking the number of bits as a const generic
+macro_rules! pack_impl {
+    ($t:ty, $bytes:literal, $bits:tt) => {
+        pub fn pack<const NUM_BITS: usize>(input: &[$t; $bits], output: &mut [u8]) {
+            if NUM_BITS == 0 {
+                for out in output {
+                    *out = 0;
+                }
+                return;
+            }
+            assert!(NUM_BITS <= $bytes * 8);
+            assert!(output.len() >= NUM_BITS * $bytes);
+
+            let mask = match NUM_BITS {
+                $bits => <$t>::MAX,
+                _ => ((1 << NUM_BITS) - 1),
+            };
+
+            for i in 0..$bits {
+                let start_bit = i * NUM_BITS;
+                let end_bit = start_bit + NUM_BITS;
+
+                let start_bit_offset = start_bit % $bits;
+                let end_bit_offset = end_bit % $bits;
+                let start_byte = start_bit / $bits;
+                let end_byte = end_bit / $bits;
+                if start_byte != end_byte && end_bit_offset != 0 {
+                    let a = input[i] << start_bit_offset;
+                    let val_a = <$t>::to_le_bytes(a);
+                    for i in 0..$bytes {
+                        output[start_byte * $bytes + i] |= val_a[i]
+                    }
+
+                    let b = (input[i] >> (NUM_BITS - end_bit_offset)) & mask;
+                    let val_b = <$t>::to_le_bytes(b);
+                    for i in 0..$bytes {
+                        output[end_byte * $bytes + i] |= val_b[i]
+                    }
+                } else {
+                    let val = (input[i] & mask) << start_bit_offset;
+                    let val = <$t>::to_le_bytes(val);
+
+                    for i in 0..$bytes {
+                        output[start_byte * $bytes + i] |= val[i]
+                    }
+                }
+            }
+        }
+    };
+}
+
+/// Macro that generates pack functions that accept num_bits as a parameter
+macro_rules! pack {
+    ($name:ident, $t:ty, $bytes:literal, $bits:tt) => {
+        mod $name {
+            pack_impl!($t, $bytes, $bits);
+        }
+
+        /// Pack unpacked `input` into `output` with a bit width of `num_bits`
+        pub fn $name(input: &[$t; $bits], output: &mut [u8], num_bits: usize) {
+            // This will get optimised into a jump table
+            seq_macro::seq!(i in 0..=$bits {
+                if i == num_bits {
+                    return $name::pack::<i>(input, output);
+                }
+            });
+            unreachable!("invalid num_bits {}", num_bits);
+        }
+    };
+}
+
+pack!(pack8, u8, 1, 8);
+pack!(pack16, u16, 2, 16);
+pack!(pack32, u32, 4, 32);
+pack!(pack64, u64, 8, 64);
+
+#[cfg(test)]
+mod tests {
+    use super::super::unpack::*;
+    use super::*;
+
+    #[test]
+    fn test_basic() {
+        let input = [0u16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+        for num_bits in 4..16 {
+            let mut output = [0u8; 16 * 2];
+            pack16(&input, &mut output, num_bits);
+            let mut other = [0u16; 16];
+            unpack16(&output, &mut other, num_bits);
+            assert_eq!(other, input);
+        }
+    }
+
+    #[test]
+    fn test_u32() {
+        let input = [
+            0u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0u32, 1, 2, 3, 4, 5, 6, 7, 8,
+            9, 10, 11, 12, 13, 14, 15,
+        ];
+        for num_bits in 4..32 {
+            let mut output = [0u8; 32 * 4];
+            pack32(&input, &mut output, num_bits);
+            let mut other = [0u32; 32];
+            unpack32(&output, &mut other, num_bits);
+            assert_eq!(other, input);
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs b/crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs
new file mode 100644
index 000000000000..061b3acef333
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/bitpacked/unpack.rs
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Copied from https://github.com/apache/arrow-rs/blob/6859efa690d4c9530cf8a24053bc6ed81025a164/parquet/src/util/bit_pack.rs
+
+/// Macro that generates an unpack function taking the number of bits as a const generic
+macro_rules! unpack_impl {
+    ($t:ty, $bytes:literal, $bits:tt) => {
+        pub fn unpack<const NUM_BITS: usize>(input: &[u8], output: &mut [$t; $bits]) {
+            if NUM_BITS == 0 {
+                for out in output {
+                    *out = 0;
+                }
+                return;
+            }
+
+            assert!(NUM_BITS <= $bytes * 8);
+
+            let mask = match NUM_BITS {
+                $bits => <$t>::MAX,
+                _ => ((1 << NUM_BITS) - 1),
+            };
+
+            assert!(input.len() >= NUM_BITS * $bytes);
+
+            let r = |output_idx: usize| {
+                <$t>::from_le_bytes(
+                    input[output_idx * $bytes..output_idx * $bytes + $bytes]
+                        .try_into()
+                        .unwrap(),
+                )
+            };
+
+            seq_macro::seq!(i in 0..$bits {
+                let start_bit = i * NUM_BITS;
+                let end_bit = start_bit + NUM_BITS;
+
+                let start_bit_offset = start_bit % $bits;
+                let end_bit_offset = end_bit % $bits;
+                let start_byte = start_bit / $bits;
+                let end_byte = end_bit / $bits;
+                if start_byte != end_byte && end_bit_offset != 0 {
+                    let val = r(start_byte);
+                    let a = val >> start_bit_offset;
+                    let val = r(end_byte);
+                    let b = val << (NUM_BITS - end_bit_offset);
+
+                    output[i] = a | (b & mask);
+                } else {
+                    let val = r(start_byte);
+                    output[i] = (val >> start_bit_offset) & mask;
+                }
+            });
+        }
+    };
+}
+
+/// Macro that generates unpack functions that accept num_bits as a parameter
+macro_rules! unpack {
+    ($name:ident, $t:ty, $bytes:literal, $bits:tt) => {
+        mod $name {
+            unpack_impl!($t, $bytes, $bits);
+        }
+
+        /// Unpack packed `input` into `output` with a bit width of `num_bits`
+        pub fn $name(input: &[u8], output: &mut [$t; $bits], num_bits: usize) {
+            // This will get optimised into a jump table
+            seq_macro::seq!(i in 0..=$bits {
+                if i == num_bits {
+                    return $name::unpack::<i>(input, output);
+                }
+            });
+            unreachable!("invalid num_bits {}", num_bits);
+        }
+    };
+}
+
+unpack!(unpack8, u8, 1, 8);
+unpack!(unpack16, u16, 2, 16);
+unpack!(unpack32, u32, 4, 32);
+unpack!(unpack64, u64, 8, 64);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic() {
+        let input = [0xFF; 4096];
+
+        for i in 0..=8 {
+            let mut output = [0; 8];
+            unpack8(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+
+        for i in 0..=16 {
+            let mut output = [0; 16];
+            unpack16(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+
+        for i in 0..=32 {
+            let mut output = [0; 32];
+            unpack32(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+
+        for i in 0..=64 {
+            let mut output = [0; 64];
+            unpack64(&input, &mut output, i);
+            for (idx, out) in output.iter().enumerate() {
+                assert_eq!(out.trailing_ones() as usize, i, "out[{}] = {}", idx, out);
+            }
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs
new file mode 100644
index 000000000000..378706541e55
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/decoder.rs
@@ -0,0 +1,362 @@
+use super::super::{bitpacked, uleb128, zigzag_leb128};
+use crate::parquet::encoding::ceil8;
+use crate::parquet::error::Error;
+
+/// An [`Iterator`] of [`i64`]
+#[derive(Debug)]
+struct Block<'a> {
+    // this is the minimum delta that must be added to every value.
+    min_delta: i64,
+    _num_mini_blocks: usize,
+    /// Number of values that each mini block has.
+    values_per_mini_block: usize,
+    bitwidths: std::slice::Iter<'a, u8>,
+    values: &'a [u8],
+    remaining: usize,     // number of elements
+    current_index: usize, // invariant: < values_per_mini_block
+    // None represents a relative delta of zero, in which case there is no miniblock.
+    current_miniblock: Option<bitpacked::Decoder<'a, u64>>,
+    // number of bytes consumed.
+    consumed_bytes: usize,
+}
+
+impl<'a> Block<'a> {
+    pub fn try_new(
+        mut values: &'a [u8],
+        num_mini_blocks: usize,
+        values_per_mini_block: usize,
+        length: usize,
+    ) -> Result<Self, Error> {
+        let length = std::cmp::min(length, num_mini_blocks * values_per_mini_block);
+
+        let mut consumed_bytes = 0;
+        let (min_delta, consumed) = zigzag_leb128::decode(values)?;
+        consumed_bytes += consumed;
+        values = &values[consumed..];
+
+        if num_mini_blocks > values.len() {
+            return Err(Error::oos(
+                "Block must contain at least num_mini_blocks bytes (the bitwidths)",
+            ));
+        }
+        let (bitwidths, remaining) = values.split_at(num_mini_blocks);
+        consumed_bytes += num_mini_blocks;
+        values = remaining;
+
+        let mut block = Block {
+            min_delta,
+            _num_mini_blocks: num_mini_blocks,
+            values_per_mini_block,
+            bitwidths: bitwidths.iter(),
+            remaining: length,
+            values,
+            current_index: 0,
+            current_miniblock: None,
+            consumed_bytes,
+        };
+
+        // Set up first mini-block
+        block.advance_miniblock()?;
+
+        Ok(block)
+    }
+
+    fn advance_miniblock(&mut self) -> Result<(), Error> {
+        // unwrap is ok: we sliced it by num_mini_blocks in try_new
+        let num_bits = self.bitwidths.next().copied().unwrap() as usize;
+
+        self.current_miniblock = if num_bits > 0 {
+            let length = std::cmp::min(self.remaining, self.values_per_mini_block);
+
+            let miniblock_length = ceil8(self.values_per_mini_block * num_bits);
+            if miniblock_length > self.values.len() {
+                return Err(Error::oos(
+                    "block must contain at least miniblock_length bytes (the mini block)",
+                ));
+            }
+            let (miniblock, remainder) = self.values.split_at(miniblock_length);
+
+            self.values = remainder;
+            self.consumed_bytes += miniblock_length;
+
+            Some(bitpacked::Decoder::try_new(miniblock, num_bits, length).unwrap())
+        } else {
+            None
+        };
+        self.current_index = 0;
+
+        Ok(())
+    }
+}
+
+impl<'a> Iterator for Block<'a> {
+    type Item = Result<i64, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining == 0 {
+            return None;
+        }
+        let result = self.min_delta
+            + self
+                .current_miniblock
+                .as_mut()
+                .map(|x| x.next().unwrap_or_default())
+                .unwrap_or(0) as i64;
+        self.current_index += 1;
+        self.remaining -= 1;
+
+        if self.remaining > 0 && self.current_index == self.values_per_mini_block {
+            if let Err(e) = self.advance_miniblock() {
+                return Some(Err(e));
+            }
+        }
+
+        Some(Ok(result))
+    }
+}
+
+/// Decoder of parquets' `DELTA_BINARY_PACKED`. Implements `Iterator<Item = i64>`.
+/// # Implementation
+/// This struct does not allocate on the heap.
+#[derive(Debug)]
+pub struct Decoder<'a> {
+    num_mini_blocks: usize,
+    values_per_mini_block: usize,
+    values_remaining: usize,
+    next_value: i64,
+    values: &'a [u8],
+    current_block: Option<Block<'a>>,
+    // the total number of bytes consumed up to a given point, excluding the bytes on the current_block
+    consumed_bytes: usize,
+}
+
+impl<'a> Decoder<'a> {
+    pub fn try_new(mut values: &'a [u8]) -> Result<Self, Error> {
+        let mut consumed_bytes = 0;
+        let (block_size, consumed) = uleb128::decode(values)?;
+        consumed_bytes += consumed;
+        assert_eq!(block_size % 128, 0);
+        values = &values[consumed..];
+        let (num_mini_blocks, consumed) = uleb128::decode(values)?;
+        let num_mini_blocks = num_mini_blocks as usize;
+        consumed_bytes += consumed;
+        values = &values[consumed..];
+        let (total_count, consumed) = uleb128::decode(values)?;
+        let total_count = total_count as usize;
+        consumed_bytes += consumed;
+        values = &values[consumed..];
+        let (first_value, consumed) = zigzag_leb128::decode(values)?;
+        consumed_bytes += consumed;
+        values = &values[consumed..];
+
+        let values_per_mini_block = block_size as usize / num_mini_blocks;
+        assert_eq!(values_per_mini_block % 8, 0);
+
+        // If we only have one value (first_value), there are no blocks.
+        let current_block = if total_count > 1 {
+            Some(Block::try_new(
+                values,
+                num_mini_blocks,
+                values_per_mini_block,
+                total_count - 1,
+            )?)
+        } else {
+            None
+        };
+
+        Ok(Self {
+            num_mini_blocks,
+            values_per_mini_block,
+            values_remaining: total_count,
+            next_value: first_value,
+            values,
+            current_block,
+            consumed_bytes,
+        })
+    }
+
+    /// Returns the total number of bytes consumed up to this point by [`Decoder`].
+    pub fn consumed_bytes(&self) -> usize {
+        self.consumed_bytes + self.current_block.as_ref().map_or(0, |b| b.consumed_bytes)
+    }
+
+    fn load_delta(&mut self) -> Result<i64, Error> {
+        // At this point we must have at least one block and value available
+        let current_block = self.current_block.as_mut().unwrap();
+        if let Some(x) = current_block.next() {
+            x
+        } else {
+            // load next block
+            self.values = &self.values[current_block.consumed_bytes..];
+            self.consumed_bytes += current_block.consumed_bytes;
+
+            let next_block = Block::try_new(
+                self.values,
+                self.num_mini_blocks,
+                self.values_per_mini_block,
+                self.values_remaining,
+            );
+            match next_block {
+                Ok(mut next_block) => {
+                    let delta = next_block
+                        .next()
+                        .ok_or_else(|| Error::oos("Missing block"))?;
+                    self.current_block = Some(next_block);
+                    delta
+                },
+                Err(e) => Err(e),
+            }
+        }
+    }
+}
+
+impl<'a> Iterator for Decoder<'a> {
+    type Item = Result<i64, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.values_remaining == 0 {
+            return None;
+        }
+
+        let result = Some(Ok(self.next_value));
+
+        self.values_remaining -= 1;
+        if self.values_remaining == 0 {
+            // do not try to load another block
+            return result;
+        }
+
+        let delta = match self.load_delta() {
+            Ok(delta) => delta,
+            Err(e) => return Some(Err(e)),
+        };
+
+        self.next_value += delta;
+        result
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.values_remaining, Some(self.values_remaining))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn single_value() {
+        // Generated by parquet-rs
+        //
+        // header: [128, 1, 4, 1, 2]
+        // block size: 128, 1
+        // mini-blocks: 4
+        // elements: 1
+        // first_value: 2 <=z> 1
+        let data = &[128, 1, 4, 1, 2];
+
+        let mut decoder = Decoder::try_new(data).unwrap();
+        let r = decoder.by_ref().collect::<Result<Vec<_>, _>>().unwrap();
+
+        assert_eq!(&r[..], &[1]);
+        assert_eq!(decoder.consumed_bytes(), 5);
+    }
+
+    #[test]
+    fn test_from_spec() {
+        let expected = (1..=5).collect::<Vec<_>>();
+        // VALIDATED FROM SPARK==3.1.1
+        // header: [128, 1, 4, 5, 2]
+        // block size: 128, 1
+        // mini-blocks: 4
+        // elements: 5
+        // first_value: 2 <=z> 1
+        // block1: [2, 0, 0, 0, 0]
+        // min_delta: 2 <=z> 1
+        // bit_width: 0
+        let data = &[128, 1, 4, 5, 2, 2, 0, 0, 0, 0];
+
+        let mut decoder = Decoder::try_new(data).unwrap();
+        let r = decoder.by_ref().collect::<Result<Vec<_>, _>>().unwrap();
+
+        assert_eq!(expected, r);
+
+        assert_eq!(decoder.consumed_bytes(), 10);
+    }
+
+    #[test]
+    fn case2() {
+        let expected = vec![1, 2, 3, 4, 5, 1];
+        // VALIDATED FROM SPARK==3.1.1
+        // header: [128, 1, 4, 6, 2]
+        // block size: 128, 1 <=u> 128
+        // mini-blocks: 4     <=u> 4
+        // elements: 6        <=u> 6
+        // first_value: 2     <=z> 1
+        // block1: [7, 3, 0, 0, 0]
+        // min_delta: 7       <=z> -4
+        // bit_widths: [3, 0, 0, 0]
+        // values: [
+        //      0b01101101
+        //      0b00001011
+        //      ...
+        // ]                  <=b> [3, 3, 3, 3, 0]
+        let data = &[
+            128, 1, 4, 6, 2, 7, 3, 0, 0, 0, 0b01101101, 0b00001011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            // these should not be consumed
+            1, 2, 3,
+        ];
+
+        let mut decoder = Decoder::try_new(data).unwrap();
+        let r = decoder.by_ref().collect::<Result<Vec<_>, _>>().unwrap();
+
+        assert_eq!(expected, r);
+        assert_eq!(decoder.consumed_bytes(), data.len() - 3);
+    }
+
+    #[test]
+    fn multiple_miniblocks() {
+        #[rustfmt::skip]
+        let data = &[
+            // Header: [128, 1, 4, 65, 100]
+            128, 1, // block size <=u> 128
+            4,      // number of mini-blocks <=u> 4
+            65,     // number of elements <=u> 65
+            100,    // first_value <=z> 50
+
+            // Block 1 header: [7, 3, 4, 0, 0]
+            7,            // min_delta <=z> -4
+            3, 4, 255, 0, // bit_widths (255 should not be used as only two miniblocks are needed)
+
+            // 32 3-bit values of 0 for mini-block 1 (12 bytes)
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+            // 32 4-bit values of 8 for mini-block 2 (16 bytes)
+            0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
+            0x88, 0x88,
+
+            // these should not be consumed
+            1, 2, 3,
+        ];
+
+        #[rustfmt::skip]
+        let expected = [
+            // First value
+            50,
+
+            // Mini-block 1: 32 deltas of -4
+            46, 42, 38, 34, 30, 26, 22, 18, 14, 10, 6, 2, -2, -6, -10, -14, -18, -22, -26, -30, -34,
+            -38, -42, -46, -50, -54, -58, -62, -66, -70, -74, -78,
+
+            // Mini-block 2: 32 deltas of 4
+            -74, -70, -66, -62, -58, -54, -50, -46, -42, -38, -34, -30, -26, -22, -18, -14, -10, -6,
+            -2, 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50,
+        ];
+
+        let mut decoder = Decoder::try_new(data).unwrap();
+        let r = decoder.by_ref().collect::<Result<Vec<_>, _>>().unwrap();
+
+        assert_eq!(&expected[..], &r[..]);
+        assert_eq!(decoder.consumed_bytes(), data.len() - 3);
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/encoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/encoder.rs
new file mode 100644
index 000000000000..9bdb861504d1
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/encoder.rs
@@ -0,0 +1,122 @@
+use super::super::{bitpacked, uleb128, zigzag_leb128};
+use crate::parquet::encoding::ceil8;
+
+/// Encodes an iterator of `i64` according to parquet's `DELTA_BINARY_PACKED`.
+/// # Implementation
+/// * This function does not allocate on the heap.
+/// * The number of mini-blocks is always 1. This may change in the future.
+pub fn encode<I: Iterator<Item = i64>>(mut iterator: I, buffer: &mut Vec<u8>) {
+    let block_size = 128;
+    let mini_blocks = 1;
+
+    let mut container = [0u8; 10];
+    let encoded_len = uleb128::encode(block_size, &mut container);
+    buffer.extend_from_slice(&container[..encoded_len]);
+
+    let encoded_len = uleb128::encode(mini_blocks, &mut container);
+    buffer.extend_from_slice(&container[..encoded_len]);
+
+    let length = iterator.size_hint().1.unwrap();
+    let encoded_len = uleb128::encode(length as u64, &mut container);
+    buffer.extend_from_slice(&container[..encoded_len]);
+
+    let mut values = [0i64; 128];
+    let mut deltas = [0u64; 128];
+
+    let first_value = iterator.next().unwrap_or_default();
+    let (container, encoded_len) = zigzag_leb128::encode(first_value);
+    buffer.extend_from_slice(&container[..encoded_len]);
+
+    let mut prev = first_value;
+    let mut length = iterator.size_hint().1.unwrap();
+    while length != 0 {
+        let mut min_delta = i64::MAX;
+        let mut max_delta = i64::MIN;
+        let mut num_bits = 0;
+        for (i, integer) in (0..128).zip(&mut iterator) {
+            let delta = integer - prev;
+            min_delta = min_delta.min(delta);
+            max_delta = max_delta.max(delta);
+
+            num_bits = 64 - (max_delta - min_delta).leading_zeros();
+            values[i] = delta;
+            prev = integer;
+        }
+        let consumed = std::cmp::min(length - iterator.size_hint().1.unwrap(), 128);
+        length = iterator.size_hint().1.unwrap();
+        let values = &values[..consumed];
+
+        values.iter().zip(deltas.iter_mut()).for_each(|(v, delta)| {
+            *delta = (v - min_delta) as u64;
+        });
+
+        // <min delta> <list of bitwidths of miniblocks> <miniblocks>
+        let (container, encoded_len) = zigzag_leb128::encode(min_delta);
+        buffer.extend_from_slice(&container[..encoded_len]);
+
+        // one miniblock => 1 byte
+        buffer.push(num_bits as u8);
+        write_miniblock(buffer, num_bits as usize, deltas);
+    }
+}
+
+fn write_miniblock(buffer: &mut Vec<u8>, num_bits: usize, deltas: [u64; 128]) {
+    if num_bits > 0 {
+        let start = buffer.len();
+
+        // bitpack encode all (deltas.len = 128 which is a multiple of 32)
+        let bytes_needed = start + ceil8(deltas.len() * num_bits);
+        buffer.resize(bytes_needed, 0);
+        bitpacked::encode(deltas.as_ref(), num_bits, &mut buffer[start..]);
+
+        let bytes_needed = start + ceil8(deltas.len() * num_bits);
+        buffer.truncate(bytes_needed);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn constant_delta() {
+        // header: [128, 1, 1, 5, 2]:
+        //  block size: 128    <=u> 128, 1
+        //  mini-blocks: 1     <=u> 1
+        //  elements: 5        <=u> 5
+        //  first_value: 2     <=z> 1
+        // block1: [2, 0, 0, 0, 0]
+        //  min_delta: 1        <=z> 2
+        //  bitwidth: 0
+        let data = 1..=5;
+        let expected = vec![128u8, 1, 1, 5, 2, 2, 0];
+
+        let mut buffer = vec![];
+        encode(data, &mut buffer);
+        assert_eq!(expected, buffer);
+    }
+
+    #[test]
+    fn negative_min_delta() {
+        // max - min = 1 - -4 = 5
+        let data = vec![1, 2, 3, 4, 5, 1];
+        // header: [128, 1, 4, 6, 2]
+        //  block size: 128    <=u> 128, 1
+        //  mini-blocks: 1     <=u> 1
+        //  elements: 6        <=u> 5
+        //  first_value: 2     <=z> 1
+        // block1: [7, 3, 253, 255]
+        //  min_delta: -4        <=z> 7
+        //  bitwidth: 3
+        //  values: [5, 5, 5, 5, 0] <=b> [
+        //      0b01101101
+        //      0b00001011
+        // ]
+        let mut expected = vec![128u8, 1, 1, 6, 2, 7, 3, 0b01101101, 0b00001011];
+        expected.extend(std::iter::repeat(0).take(128 * 3 / 8 - 2)); // 128 values, 3 bits, 2 already used
+
+        let mut buffer = vec![];
+        encode(data.into_iter(), &mut buffer);
+        assert_eq!(expected, buffer);
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs
new file mode 100644
index 000000000000..2ba0e953b83c
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_bitpacked/mod.rs
@@ -0,0 +1,90 @@
+mod decoder;
+mod encoder;
+
+pub use decoder::Decoder;
+pub use encoder::encode;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parquet::error::Error;
+
+    #[test]
+    fn basic() -> Result<(), Error> {
+        let data = vec![1, 3, 1, 2, 3];
+
+        let mut buffer = vec![];
+        encode(data.clone().into_iter(), &mut buffer);
+        let iter = Decoder::try_new(&buffer)?;
+
+        let result = iter.collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(result, data);
+        Ok(())
+    }
+
+    #[test]
+    fn negative_value() -> Result<(), Error> {
+        let data = vec![1, 3, -1, 2, 3];
+
+        let mut buffer = vec![];
+        encode(data.clone().into_iter(), &mut buffer);
+        let iter = Decoder::try_new(&buffer)?;
+
+        let result = iter.collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(result, data);
+        Ok(())
+    }
+
+    #[test]
+    fn some() -> Result<(), Error> {
+        let data = vec![
+            -2147483648,
+            -1777158217,
+            -984917788,
+            -1533539476,
+            -731221386,
+            -1322398478,
+            906736096,
+        ];
+
+        let mut buffer = vec![];
+        encode(data.clone().into_iter(), &mut buffer);
+        let iter = Decoder::try_new(&buffer)?;
+
+        let result = iter.collect::<Result<Vec<_>, Error>>()?;
+        assert_eq!(result, data);
+        Ok(())
+    }
+
+    #[test]
+    fn more_than_one_block() -> Result<(), Error> {
+        let mut data = vec![1, 3, -1, 2, 3, 10, 1];
+        for x in 0..128 {
+            data.push(x - 10)
+        }
+
+        let mut buffer = vec![];
+        encode(data.clone().into_iter(), &mut buffer);
+        let iter = Decoder::try_new(&buffer)?;
+
+        let result = iter.collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(result, data);
+        Ok(())
+    }
+
+    #[test]
+    fn test_another() -> Result<(), Error> {
+        let data = vec![2, 3, 1, 2, 1];
+
+        let mut buffer = vec![];
+        encode(data.clone().into_iter(), &mut buffer);
+        let len = buffer.len();
+        let mut iter = Decoder::try_new(&buffer)?;
+
+        let result = iter.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(result, data);
+
+        assert_eq!(iter.consumed_bytes(), len);
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs
new file mode 100644
index 000000000000..0313e7890394
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/decoder.rs
@@ -0,0 +1,106 @@
+use super::super::{delta_bitpacked, delta_length_byte_array};
+use crate::parquet::error::Error;
+
+/// Decodes according to [Delta strings](https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-strings-delta_byte_array--7),
+/// prefixes, lengths and values
+/// # Implementation
+/// This struct does not allocate on the heap.
+#[derive(Debug)]
+pub struct Decoder<'a> {
+    values: &'a [u8],
+    prefix_lengths: delta_bitpacked::Decoder<'a>,
+}
+
+impl<'a> Decoder<'a> {
+    pub fn try_new(values: &'a [u8]) -> Result<Self, Error> {
+        let prefix_lengths = delta_bitpacked::Decoder::try_new(values)?;
+        Ok(Self {
+            values,
+            prefix_lengths,
+        })
+    }
+
+    pub fn into_lengths(self) -> Result<delta_length_byte_array::Decoder<'a>, Error> {
+        assert_eq!(self.prefix_lengths.size_hint().0, 0);
+        delta_length_byte_array::Decoder::try_new(
+            &self.values[self.prefix_lengths.consumed_bytes()..],
+        )
+    }
+}
+
+impl<'a> Iterator for Decoder<'a> {
+    type Item = Result<u32, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.prefix_lengths.next().map(|x| x.map(|x| x as u32))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_bla() -> Result<(), Error> {
+        // VALIDATED from spark==3.1.1
+        let data = &[
+            128, 1, 4, 2, 0, 0, 0, 0, 0, 0, 128, 1, 4, 2, 10, 0, 0, 0, 0, 0, 72, 101, 108, 108,
+            111, 87, 111, 114, 108, 100,
+            // extra bytes are not from spark, but they should be ignored by the decoder
+            // because they are beyond the sum of all lengths.
+            1, 2, 3,
+        ];
+        // result of encoding
+        let expected = &["Hello", "World"];
+        let expected_lengths = expected.iter().map(|x| x.len() as i32).collect::<Vec<_>>();
+        let expected_prefixes = vec![0, 0];
+        let expected_values = expected.join("");
+        let expected_values = expected_values.as_bytes();
+
+        let mut decoder = Decoder::try_new(data)?;
+        let prefixes = decoder.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(prefixes, expected_prefixes);
+
+        // move to the lengths
+        let mut decoder = decoder.into_lengths()?;
+
+        let lengths = decoder.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(lengths, expected_lengths);
+
+        // move to the values
+        let values = decoder.values();
+        assert_eq!(values, expected_values);
+        Ok(())
+    }
+
+    #[test]
+    fn test_with_prefix() -> Result<(), Error> {
+        // VALIDATED from spark==3.1.1
+        let data = &[
+            128, 1, 4, 2, 0, 6, 0, 0, 0, 0, 128, 1, 4, 2, 10, 4, 0, 0, 0, 0, 72, 101, 108, 108,
+            111, 105, 99, 111, 112, 116, 101, 114,
+            // extra bytes are not from spark, but they should be ignored by the decoder
+            // because they are beyond the sum of all lengths.
+            1, 2, 3,
+        ];
+        // result of encoding
+        let expected_lengths = vec![5, 7];
+        let expected_prefixes = vec![0, 3];
+        let expected_values = b"Helloicopter";
+
+        let mut decoder = Decoder::try_new(data)?;
+        let prefixes = decoder.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(prefixes, expected_prefixes);
+
+        // move to the lengths
+        let mut decoder = decoder.into_lengths()?;
+
+        let lengths = decoder.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(lengths, expected_lengths);
+
+        // move to the values
+        let values = decoder.values();
+        assert_eq!(values, expected_values);
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_byte_array/encoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/encoder.rs
new file mode 100644
index 000000000000..1e9e071c87be
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/encoder.rs
@@ -0,0 +1,32 @@
+use super::super::delta_bitpacked;
+use crate::parquet::encoding::delta_length_byte_array;
+
+/// Encodes an iterator of according to DELTA_BYTE_ARRAY
+pub fn encode<'a, I: Iterator<Item = &'a [u8]> + Clone>(iterator: I, buffer: &mut Vec<u8>) {
+    let mut previous = b"".as_ref();
+
+    let mut sum_lengths = 0;
+    let prefixes = iterator
+        .clone()
+        .map(|item| {
+            let prefix_length = item
+                .iter()
+                .zip(previous.iter())
+                .enumerate()
+                // find first difference
+                .find_map(|(length, (lhs, rhs))| (lhs != rhs).then_some(length))
+                .unwrap_or(previous.len());
+            previous = item;
+
+            sum_lengths += item.len() - prefix_length;
+            prefix_length as i64
+        })
+        .collect::<Vec<_>>();
+    delta_bitpacked::encode(prefixes.iter().copied(), buffer);
+
+    let remaining = iterator
+        .zip(prefixes)
+        .map(|(item, prefix)| &item[prefix as usize..]);
+
+    delta_length_byte_array::encode(remaining, buffer);
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_byte_array/mod.rs b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/mod.rs
new file mode 100644
index 000000000000..9eab9c5f6ead
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_byte_array/mod.rs
@@ -0,0 +1,33 @@
+mod decoder;
+mod encoder;
+
+pub use decoder::Decoder;
+pub use encoder::encode;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parquet::error::Error;
+
+    #[test]
+    fn basic() -> Result<(), Error> {
+        let data = vec![b"Hello".as_ref(), b"Helicopter"];
+        let mut buffer = vec![];
+        encode(data.clone().into_iter(), &mut buffer);
+
+        let mut decoder = Decoder::try_new(&buffer)?;
+        let prefixes = decoder.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(prefixes, vec![0, 3]);
+
+        // move to the lengths
+        let mut decoder = decoder.into_lengths()?;
+
+        let lengths = decoder.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(lengths, vec![5, 7]);
+
+        // move to the values
+        let values = decoder.values();
+        assert_eq!(values, b"Helloicopter");
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs
new file mode 100644
index 000000000000..df1dd2daaafb
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/decoder.rs
@@ -0,0 +1,80 @@
+use super::super::delta_bitpacked;
+use crate::parquet::error::Error;
+
+/// Decodes [Delta-length byte array](https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-length-byte-array-delta_length_byte_array--6)
+/// lengths and values.
+/// # Implementation
+/// This struct does not allocate on the heap.
+/// # Example
+/// ```
+/// use crate::parquet::parquet::encoding::delta_length_byte_array::Decoder;
+///
+/// let expected = &["Hello", "World"];
+/// let expected_lengths = expected.iter().map(|x| x.len() as i32).collect::<Vec<_>>();
+/// let expected_values = expected.join("");
+/// let expected_values = expected_values.as_bytes();
+/// let data = &[
+///     128, 1, 4, 2, 10, 0, 0, 0, 0, 0, 72, 101, 108, 108, 111, 87, 111, 114, 108, 100,
+/// ];
+///
+/// let mut decoder = Decoder::try_new(data).unwrap();
+///
+/// // Extract the lengths
+/// let lengths = decoder.by_ref().collect::<Result<Vec<_>, _>>().unwrap();
+/// assert_eq!(lengths, expected_lengths);
+///
+/// // Extract the values. This _must_ be called after consuming all lengths by reference (see above).
+/// let values = decoder.into_values();
+///
+/// assert_eq!(values, expected_values);
+#[derive(Debug)]
+pub struct Decoder<'a> {
+    values: &'a [u8],
+    lengths: delta_bitpacked::Decoder<'a>,
+    total_length: u32,
+}
+
+impl<'a> Decoder<'a> {
+    pub fn try_new(values: &'a [u8]) -> Result<Self, Error> {
+        let lengths = delta_bitpacked::Decoder::try_new(values)?;
+        Ok(Self {
+            values,
+            lengths,
+            total_length: 0,
+        })
+    }
+
+    /// Consumes this decoder and returns the slice of concatenated values.
+    /// # Panics
+    /// This function panics if this iterator has not been fully consumed.
+    pub fn into_values(self) -> &'a [u8] {
+        assert_eq!(self.lengths.size_hint().0, 0);
+        let start = self.lengths.consumed_bytes();
+        &self.values[start..start + self.total_length as usize]
+    }
+
+    /// Returns the slice of concatenated values.
+    /// # Panics
+    /// This function panics if this iterator has not yet been fully consumed.
+    pub fn values(&self) -> &'a [u8] {
+        assert_eq!(self.lengths.size_hint().0, 0);
+        let start = self.lengths.consumed_bytes();
+        &self.values[start..start + self.total_length as usize]
+    }
+}
+
+impl<'a> Iterator for Decoder<'a> {
+    type Item = Result<i32, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let result = self.lengths.next();
+        match result {
+            Some(Ok(v)) => {
+                self.total_length += v as u32;
+                Some(Ok(v as i32))
+            },
+            Some(Err(error)) => Some(Err(error)),
+            None => None,
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/encoder.rs b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/encoder.rs
new file mode 100644
index 000000000000..fc2121cf68e8
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/encoder.rs
@@ -0,0 +1,19 @@
+use crate::parquet::encoding::delta_bitpacked;
+
+/// Encodes a clonable iterator of `&[u8]` into `buffer`. This does not allocated on the heap.
+/// # Implementation
+/// This encoding is equivalent to call [`delta_bitpacked::encode`] on the lengths of the items
+/// of the iterator followed by extending the buffer from each item of the iterator.
+pub fn encode<A: AsRef<[u8]>, I: Iterator<Item = A> + Clone>(iterator: I, buffer: &mut Vec<u8>) {
+    let mut total_length = 0;
+    delta_bitpacked::encode(
+        iterator.clone().map(|x| {
+            let len = x.as_ref().len();
+            total_length += len;
+            len as i64
+        }),
+        buffer,
+    );
+    buffer.reserve(total_length);
+    iterator.for_each(|x| buffer.extend(x.as_ref()))
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/mod.rs b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/mod.rs
new file mode 100644
index 000000000000..91a42e3636ac
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/delta_length_byte_array/mod.rs
@@ -0,0 +1,50 @@
+mod decoder;
+mod encoder;
+
+pub use decoder::Decoder;
+pub use encoder::encode;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parquet::error::Error;
+
+    #[test]
+    fn basic() -> Result<(), Error> {
+        let data = vec!["aa", "bbb", "a", "aa", "b"];
+
+        let mut buffer = vec![];
+        encode(data.into_iter().map(|x| x.as_bytes()), &mut buffer);
+
+        let mut iter = Decoder::try_new(&buffer)?;
+
+        let result = iter.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(result, vec![2, 3, 1, 2, 1]);
+
+        let result = iter.values();
+        assert_eq!(result, b"aabbbaaab".as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn many_numbers() -> Result<(), Error> {
+        let mut data = vec![];
+        for i in 0..136 {
+            data.push(format!("a{}", i))
+        }
+        let expected_values = data.join("");
+        let expected_lengths = data.iter().map(|x| x.len() as i32).collect::<Vec<_>>();
+
+        let mut buffer = vec![];
+        encode(data.into_iter(), &mut buffer);
+
+        let mut iter = Decoder::try_new(&buffer)?;
+
+        let result = iter.by_ref().collect::<Result<Vec<_>, _>>()?;
+        assert_eq!(result, expected_lengths);
+
+        let result = iter.into_values();
+        assert_eq!(result, expected_values.as_str().as_bytes());
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/bitmap.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/bitmap.rs
new file mode 100644
index 000000000000..f46f22f84adb
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/bitmap.rs
@@ -0,0 +1,102 @@
+use std::io::Write;
+
+const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
+
+/// Sets bit at position `i` in `byte`
+#[inline]
+pub fn set(byte: u8, i: usize) -> u8 {
+    byte | BIT_MASK[i]
+}
+
+/// An [`Iterator`] of bool that decodes a bitmap.
+/// This is a specialization of [`super::super::bitpacked::Decoder`] for `num_bits == 1`.
+#[derive(Debug)]
+pub struct BitmapIter<'a> {
+    iter: std::slice::Iter<'a, u8>,
+    current_byte: &'a u8,
+    remaining: usize,
+    mask: u8,
+}
+
+impl<'a> BitmapIter<'a> {
+    /// Returns a new [`BitmapIter`].
+    /// # Panics
+    /// This function panics iff `offset / 8 > slice.len()`
+    #[inline]
+    pub fn new(slice: &'a [u8], offset: usize, len: usize) -> Self {
+        let bytes = &slice[offset / 8..];
+
+        let mut iter = bytes.iter();
+
+        let current_byte = iter.next().unwrap_or(&0);
+
+        Self {
+            iter,
+            mask: 1u8.rotate_left(offset as u32),
+            remaining: len,
+            current_byte,
+        }
+    }
+}
+
+impl<'a> Iterator for BitmapIter<'a> {
+    type Item = bool;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        // easily predictable in branching
+        if self.remaining == 0 {
+            return None;
+        } else {
+            self.remaining -= 1;
+        }
+        let value = self.current_byte & self.mask != 0;
+        self.mask = self.mask.rotate_left(1);
+        if self.mask == 1 {
+            // reached a new byte => try to fetch it from the iterator
+            if let Some(v) = self.iter.next() {
+                self.current_byte = v
+            }
+        }
+        Some(value)
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.remaining, Some(self.remaining))
+    }
+}
+
+/// Writes an iterator of bools into writer, with LSB first.
+pub fn encode_bool<W: Write, I: Iterator<Item = bool>>(
+    writer: &mut W,
+    mut iterator: I,
+) -> std::io::Result<()> {
+    // the length of the iterator.
+    let length = iterator.size_hint().1.unwrap();
+
+    let chunks = length / 8;
+    let reminder = length % 8;
+
+    (0..chunks).try_for_each(|_| {
+        let mut byte = 0u8;
+        (0..8).for_each(|i| {
+            if iterator.next().unwrap() {
+                byte = set(byte, i)
+            }
+        });
+        writer.write_all(&[byte])
+    })?;
+
+    if reminder != 0 {
+        let mut last = 0u8;
+        iterator.enumerate().for_each(|(i, value)| {
+            if value {
+                last = set(last, i)
+            }
+        });
+        writer.write_all(&[last])
+    } else {
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/decoder.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/decoder.rs
new file mode 100644
index 000000000000..859ed246c0e0
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/decoder.rs
@@ -0,0 +1,142 @@
+use super::super::{ceil8, uleb128};
+use super::HybridEncoded;
+use crate::parquet::error::Error;
+
+/// An [`Iterator`] of [`HybridEncoded`].
+#[derive(Debug, Clone)]
+pub struct Decoder<'a> {
+    values: &'a [u8],
+    num_bits: usize,
+}
+
+impl<'a> Decoder<'a> {
+    /// Returns a new [`Decoder`]
+    pub fn new(values: &'a [u8], num_bits: usize) -> Self {
+        Self { values, num_bits }
+    }
+
+    /// Returns the number of bits being used by this decoder.
+    #[inline]
+    pub fn num_bits(&self) -> usize {
+        self.num_bits
+    }
+}
+
+impl<'a> Iterator for Decoder<'a> {
+    type Item = Result<HybridEncoded<'a>, Error>;
+
+    #[inline] // -18% improvement in bench
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.num_bits == 0 {
+            return None;
+        }
+
+        if self.values.is_empty() {
+            return None;
+        }
+
+        let (indicator, consumed) = match uleb128::decode(self.values) {
+            Ok((indicator, consumed)) => (indicator, consumed),
+            Err(e) => return Some(Err(e)),
+        };
+        self.values = &self.values[consumed..];
+        if self.values.is_empty() {
+            return None;
+        };
+
+        if indicator & 1 == 1 {
+            // is bitpacking
+            let bytes = (indicator as usize >> 1) * self.num_bits;
+            let bytes = std::cmp::min(bytes, self.values.len());
+            let (result, remaining) = self.values.split_at(bytes);
+            self.values = remaining;
+            Some(Ok(HybridEncoded::Bitpacked(result)))
+        } else {
+            // is rle
+            let run_length = indicator as usize >> 1;
+            // repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width)
+            let rle_bytes = ceil8(self.num_bits);
+            let (result, remaining) = self.values.split_at(rle_bytes);
+            self.values = remaining;
+            Some(Ok(HybridEncoded::Rle(result, run_length)))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::super::bitpacked;
+    use super::*;
+
+    #[test]
+    fn basics_1() {
+        let bit_width = 1usize;
+        let length = 5;
+        let values = vec![
+            2, 0, 0, 0, // length
+            0b00000011, 0b00001011, // data
+        ];
+
+        let mut decoder = Decoder::new(&values[4..6], bit_width);
+
+        let run = decoder.next().unwrap();
+
+        if let HybridEncoded::Bitpacked(values) = run.unwrap() {
+            assert_eq!(values, &[0b00001011]);
+            let result = bitpacked::Decoder::<u32>::try_new(values, bit_width, length)
+                .unwrap()
+                .collect::<Vec<_>>();
+            assert_eq!(result, &[1, 1, 0, 1, 0]);
+        } else {
+            panic!()
+        };
+    }
+
+    #[test]
+    fn basics_2() {
+        // This test was validated by the result of what pyarrow3 outputs when
+        // the bitmap is used.
+        let bit_width = 1;
+        let values = vec![
+            3, 0, 0, 0, // length
+            0b00000101, 0b11101011, 0b00000010, // data
+        ];
+        let expected = &[1, 1, 0, 1, 0, 1, 1, 1, 0, 1];
+
+        let mut decoder = Decoder::new(&values[4..4 + 3], bit_width);
+
+        let run = decoder.next().unwrap();
+
+        if let HybridEncoded::Bitpacked(values) = run.unwrap() {
+            assert_eq!(values, &[0b11101011, 0b00000010]);
+            let result = bitpacked::Decoder::<u32>::try_new(values, bit_width, 10)
+                .unwrap()
+                .collect::<Vec<_>>();
+            assert_eq!(result, expected);
+        } else {
+            panic!()
+        };
+    }
+
+    #[test]
+    fn basics_3() {
+        let bit_width = 1;
+        let length = 8;
+        let values = vec![
+            2, 0, 0, 0,          // length
+            0b00010000, // data
+            0b00000001,
+        ];
+
+        let mut decoder = Decoder::new(&values[4..4 + 2], bit_width);
+
+        let run = decoder.next().unwrap();
+
+        if let HybridEncoded::Rle(values, items) = run.unwrap() {
+            assert_eq!(values, &[0b00000001]);
+            assert_eq!(items, length);
+        } else {
+            panic!()
+        };
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs
new file mode 100644
index 000000000000..c4523a7da53b
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs
@@ -0,0 +1,166 @@
+use std::io::Write;
+
+use super::bitpacked_encode;
+use crate::parquet::encoding::{bitpacked, ceil8, uleb128};
+
+/// RLE-hybrid encoding of `u32`. This currently only yields bitpacked values.
+pub fn encode_u32<W: Write, I: Iterator<Item = u32>>(
+    writer: &mut W,
+    iterator: I,
+    num_bits: u32,
+) -> std::io::Result<()> {
+    let num_bits = num_bits as u8;
+    // the length of the iterator.
+    let length = iterator.size_hint().1.unwrap();
+
+    // write the length + indicator
+    let mut header = ceil8(length) as u64;
+    header <<= 1;
+    header |= 1; // it is bitpacked => first bit is set
+    let mut container = [0; 10];
+    let used = uleb128::encode(header, &mut container);
+    writer.write_all(&container[..used])?;
+
+    bitpacked_encode_u32(writer, iterator, num_bits as usize)?;
+
+    Ok(())
+}
+
+const U32_BLOCK_LEN: usize = 32;
+
+fn bitpacked_encode_u32<W: Write, I: Iterator<Item = u32>>(
+    writer: &mut W,
+    mut iterator: I,
+    num_bits: usize,
+) -> std::io::Result<()> {
+    // the length of the iterator.
+    let length = iterator.size_hint().1.unwrap();
+
+    let chunks = length / U32_BLOCK_LEN;
+    let remainder = length - chunks * U32_BLOCK_LEN;
+    let mut buffer = [0u32; U32_BLOCK_LEN];
+
+    let compressed_chunk_size = ceil8(U32_BLOCK_LEN * num_bits);
+
+    for _ in 0..chunks {
+        iterator
+            .by_ref()
+            .take(U32_BLOCK_LEN)
+            .zip(buffer.iter_mut())
+            .for_each(|(item, buf)| *buf = item);
+
+        let mut packed = [0u8; 4 * U32_BLOCK_LEN];
+        bitpacked::encode_pack::<u32>(&buffer, num_bits, packed.as_mut());
+        writer.write_all(&packed[..compressed_chunk_size])?;
+    }
+
+    if remainder != 0 {
+        let compressed_remainder_size = ceil8(remainder * num_bits);
+        iterator
+            .by_ref()
+            .take(remainder)
+            .zip(buffer.iter_mut())
+            .for_each(|(item, buf)| *buf = item);
+
+        let mut packed = [0u8; 4 * U32_BLOCK_LEN];
+        bitpacked::encode_pack(&buffer, num_bits, packed.as_mut());
+        writer.write_all(&packed[..compressed_remainder_size])?;
+    };
+    Ok(())
+}
+
+/// the bitpacked part of the encoder.
+pub fn encode_bool<W: Write, I: Iterator<Item = bool>>(
+    writer: &mut W,
+    iterator: I,
+) -> std::io::Result<()> {
+    // the length of the iterator.
+    let length = iterator.size_hint().1.unwrap();
+
+    // write the length + indicator
+    let mut header = ceil8(length) as u64;
+    header <<= 1;
+    header |= 1; // it is bitpacked => first bit is set
+    let mut container = [0; 10];
+    let used = uleb128::encode(header, &mut container);
+
+    writer.write_all(&container[..used])?;
+
+    // encode the iterator
+    bitpacked_encode(writer, iterator)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::bitmap::BitmapIter;
+    use super::*;
+
+    #[test]
+    fn bool_basics_1() -> std::io::Result<()> {
+        let iter = BitmapIter::new(&[0b10011101u8, 0b10011101], 0, 14);
+
+        let mut vec = vec![];
+
+        encode_bool(&mut vec, iter)?;
+
+        assert_eq!(vec, vec![(2 << 1 | 1), 0b10011101u8, 0b00011101]);
+
+        Ok(())
+    }
+
+    #[test]
+    fn bool_from_iter() -> std::io::Result<()> {
+        let mut vec = vec![];
+
+        encode_bool(
+            &mut vec,
+            vec![true, true, true, true, true, true, true, true].into_iter(),
+        )?;
+
+        assert_eq!(vec, vec![(1 << 1 | 1), 0b11111111]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_encode_u32() -> std::io::Result<()> {
+        let mut vec = vec![];
+
+        encode_u32(&mut vec, vec![0, 1, 2, 1, 2, 1, 1, 0, 3].into_iter(), 2)?;
+
+        assert_eq!(
+            vec,
+            vec![(2 << 1 | 1), 0b01_10_01_00, 0b00_01_01_10, 0b_00_00_00_11]
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_encode_u32_large() -> std::io::Result<()> {
+        let mut vec = vec![];
+
+        let values = (0..128).map(|x| x % 4);
+
+        encode_u32(&mut vec, values, 2)?;
+
+        let length = 128;
+        let expected = 0b11_10_01_00u8;
+
+        let mut expected = vec![expected; length / 4];
+        expected.insert(0, ((length / 8) as u8) << 1 | 1);
+
+        assert_eq!(vec, expected);
+        Ok(())
+    }
+
+    #[test]
+    fn test_u32_other() -> std::io::Result<()> {
+        let values = vec![3, 3, 0, 3, 2, 3, 3, 3, 3, 1, 3, 3, 3, 0, 3].into_iter();
+
+        let mut vec = vec![];
+        encode_u32(&mut vec, values, 2)?;
+
+        let expected = vec![5, 207, 254, 247, 51];
+        assert_eq!(expected, vec);
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs
new file mode 100644
index 000000000000..39e3a5bd2bac
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/mod.rs
@@ -0,0 +1,263 @@
+// See https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3
+mod bitmap;
+mod decoder;
+mod encoder;
+pub use bitmap::{encode_bool as bitpacked_encode, BitmapIter};
+pub use decoder::Decoder;
+pub use encoder::{encode_bool, encode_u32};
+
+use super::bitpacked;
+use crate::parquet::error::Error;
+
+/// The two possible states of an RLE-encoded run.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum HybridEncoded<'a> {
+    /// A bitpacked slice. The consumer must know its bit-width to unpack it.
+    Bitpacked(&'a [u8]),
+    /// A RLE-encoded slice. The first attribute corresponds to the slice (that can be interpreted)
+    /// the second attribute corresponds to the number of repetitions.
+    Rle(&'a [u8], usize),
+}
+
+#[derive(Debug, Clone)]
+enum State<'a> {
+    None,
+    Bitpacked(bitpacked::Decoder<'a, u32>),
+    Rle(std::iter::Take<std::iter::Repeat<u32>>),
+    // Add a special branch for a single value to
+    // adhere to the strong law of small numbers.
+    Single(Option<u32>),
+}
+
+/// [`Iterator`] of [`u32`] from a byte slice of Hybrid-RLE encoded values
+#[derive(Debug, Clone)]
+pub struct HybridRleDecoder<'a> {
+    decoder: Decoder<'a>,
+    state: State<'a>,
+    remaining: usize,
+}
+
+#[inline]
+fn read_next<'a>(decoder: &mut Decoder<'a>, remaining: usize) -> Result<State<'a>, Error> {
+    Ok(match decoder.next().transpose()? {
+        Some(HybridEncoded::Bitpacked(packed)) => {
+            let num_bits = decoder.num_bits();
+            let length = std::cmp::min(packed.len() * 8 / num_bits, remaining);
+            let decoder = bitpacked::Decoder::<u32>::try_new(packed, num_bits, length)?;
+            State::Bitpacked(decoder)
+        },
+        Some(HybridEncoded::Rle(pack, additional)) => {
+            let mut bytes = [0u8; std::mem::size_of::<u32>()];
+            pack.iter().zip(bytes.iter_mut()).for_each(|(src, dst)| {
+                *dst = *src;
+            });
+            let value = u32::from_le_bytes(bytes);
+            if additional == 1 {
+                State::Single(Some(value))
+            } else {
+                State::Rle(std::iter::repeat(value).take(additional))
+            }
+        },
+        None => State::None,
+    })
+}
+
+impl<'a> HybridRleDecoder<'a> {
+    /// Returns a new [`HybridRleDecoder`]
+    pub fn try_new(data: &'a [u8], num_bits: u32, num_values: usize) -> Result<Self, Error> {
+        let num_bits = num_bits as usize;
+        let mut decoder = Decoder::new(data, num_bits);
+        let state = read_next(&mut decoder, num_values)?;
+        Ok(Self {
+            decoder,
+            state,
+            remaining: num_values,
+        })
+    }
+}
+
+impl<'a> Iterator for HybridRleDecoder<'a> {
+    type Item = Result<u32, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining == 0 {
+            return None;
+        };
+        let result = match &mut self.state {
+            State::Single(opt_val) => {
+                // make sure to take so that next calls will return 'None'
+                // indicating that the iterator is finished.
+                opt_val.take()
+            },
+            State::Bitpacked(decoder) => decoder.next(),
+            State::Rle(iter) => iter.next(),
+            State::None => Some(0),
+        };
+        if let Some(result) = result {
+            self.remaining -= 1;
+            Some(Ok(result))
+        } else {
+            match read_next(&mut self.decoder, self.remaining) {
+                Ok(state) => {
+                    self.state = state;
+                    self.next()
+                },
+                Err(e) => Some(Err(e)),
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.remaining, Some(self.remaining))
+    }
+}
+
+impl<'a> ExactSizeIterator for HybridRleDecoder<'a> {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn roundtrip() -> Result<(), Error> {
+        let mut buffer = vec![];
+        let num_bits = 10u32;
+
+        let data = (0..1000).collect::<Vec<_>>();
+
+        encode_u32(&mut buffer, data.iter().cloned(), num_bits).unwrap();
+
+        let decoder = HybridRleDecoder::try_new(&buffer, num_bits, data.len())?;
+
+        let result = decoder.collect::<Result<Vec<_>, _>>()?;
+
+        assert_eq!(result, data);
+        Ok(())
+    }
+
+    #[test]
+    fn pyarrow_integration() -> Result<(), Error> {
+        // data encoded from pyarrow representing (0..1000)
+        let data = vec![
+            127, 0, 4, 32, 192, 0, 4, 20, 96, 192, 1, 8, 36, 160, 192, 2, 12, 52, 224, 192, 3, 16,
+            68, 32, 193, 4, 20, 84, 96, 193, 5, 24, 100, 160, 193, 6, 28, 116, 224, 193, 7, 32,
+            132, 32, 194, 8, 36, 148, 96, 194, 9, 40, 164, 160, 194, 10, 44, 180, 224, 194, 11, 48,
+            196, 32, 195, 12, 52, 212, 96, 195, 13, 56, 228, 160, 195, 14, 60, 244, 224, 195, 15,
+            64, 4, 33, 196, 16, 68, 20, 97, 196, 17, 72, 36, 161, 196, 18, 76, 52, 225, 196, 19,
+            80, 68, 33, 197, 20, 84, 84, 97, 197, 21, 88, 100, 161, 197, 22, 92, 116, 225, 197, 23,
+            96, 132, 33, 198, 24, 100, 148, 97, 198, 25, 104, 164, 161, 198, 26, 108, 180, 225,
+            198, 27, 112, 196, 33, 199, 28, 116, 212, 97, 199, 29, 120, 228, 161, 199, 30, 124,
+            244, 225, 199, 31, 128, 4, 34, 200, 32, 132, 20, 98, 200, 33, 136, 36, 162, 200, 34,
+            140, 52, 226, 200, 35, 144, 68, 34, 201, 36, 148, 84, 98, 201, 37, 152, 100, 162, 201,
+            38, 156, 116, 226, 201, 39, 160, 132, 34, 202, 40, 164, 148, 98, 202, 41, 168, 164,
+            162, 202, 42, 172, 180, 226, 202, 43, 176, 196, 34, 203, 44, 180, 212, 98, 203, 45,
+            184, 228, 162, 203, 46, 188, 244, 226, 203, 47, 192, 4, 35, 204, 48, 196, 20, 99, 204,
+            49, 200, 36, 163, 204, 50, 204, 52, 227, 204, 51, 208, 68, 35, 205, 52, 212, 84, 99,
+            205, 53, 216, 100, 163, 205, 54, 220, 116, 227, 205, 55, 224, 132, 35, 206, 56, 228,
+            148, 99, 206, 57, 232, 164, 163, 206, 58, 236, 180, 227, 206, 59, 240, 196, 35, 207,
+            60, 244, 212, 99, 207, 61, 248, 228, 163, 207, 62, 252, 244, 227, 207, 63, 0, 5, 36,
+            208, 64, 4, 21, 100, 208, 65, 8, 37, 164, 208, 66, 12, 53, 228, 208, 67, 16, 69, 36,
+            209, 68, 20, 85, 100, 209, 69, 24, 101, 164, 209, 70, 28, 117, 228, 209, 71, 32, 133,
+            36, 210, 72, 36, 149, 100, 210, 73, 40, 165, 164, 210, 74, 44, 181, 228, 210, 75, 48,
+            197, 36, 211, 76, 52, 213, 100, 211, 77, 56, 229, 164, 211, 78, 60, 245, 228, 211, 79,
+            64, 5, 37, 212, 80, 68, 21, 101, 212, 81, 72, 37, 165, 212, 82, 76, 53, 229, 212, 83,
+            80, 69, 37, 213, 84, 84, 85, 101, 213, 85, 88, 101, 165, 213, 86, 92, 117, 229, 213,
+            87, 96, 133, 37, 214, 88, 100, 149, 101, 214, 89, 104, 165, 165, 214, 90, 108, 181,
+            229, 214, 91, 112, 197, 37, 215, 92, 116, 213, 101, 215, 93, 120, 229, 165, 215, 94,
+            124, 245, 229, 215, 95, 128, 5, 38, 216, 96, 132, 21, 102, 216, 97, 136, 37, 166, 216,
+            98, 140, 53, 230, 216, 99, 144, 69, 38, 217, 100, 148, 85, 102, 217, 101, 152, 101,
+            166, 217, 102, 156, 117, 230, 217, 103, 160, 133, 38, 218, 104, 164, 149, 102, 218,
+            105, 168, 165, 166, 218, 106, 172, 181, 230, 218, 107, 176, 197, 38, 219, 108, 180,
+            213, 102, 219, 109, 184, 229, 166, 219, 110, 188, 245, 230, 219, 111, 192, 5, 39, 220,
+            112, 196, 21, 103, 220, 113, 200, 37, 167, 220, 114, 204, 53, 231, 220, 115, 208, 69,
+            39, 221, 116, 212, 85, 103, 221, 117, 216, 101, 167, 221, 118, 220, 117, 231, 221, 119,
+            224, 133, 39, 222, 120, 228, 149, 103, 222, 121, 232, 165, 167, 222, 122, 236, 181,
+            231, 222, 123, 240, 197, 39, 223, 124, 244, 213, 103, 223, 125, 125, 248, 229, 167,
+            223, 126, 252, 245, 231, 223, 127, 0, 6, 40, 224, 128, 4, 22, 104, 224, 129, 8, 38,
+            168, 224, 130, 12, 54, 232, 224, 131, 16, 70, 40, 225, 132, 20, 86, 104, 225, 133, 24,
+            102, 168, 225, 134, 28, 118, 232, 225, 135, 32, 134, 40, 226, 136, 36, 150, 104, 226,
+            137, 40, 166, 168, 226, 138, 44, 182, 232, 226, 139, 48, 198, 40, 227, 140, 52, 214,
+            104, 227, 141, 56, 230, 168, 227, 142, 60, 246, 232, 227, 143, 64, 6, 41, 228, 144, 68,
+            22, 105, 228, 145, 72, 38, 169, 228, 146, 76, 54, 233, 228, 147, 80, 70, 41, 229, 148,
+            84, 86, 105, 229, 149, 88, 102, 169, 229, 150, 92, 118, 233, 229, 151, 96, 134, 41,
+            230, 152, 100, 150, 105, 230, 153, 104, 166, 169, 230, 154, 108, 182, 233, 230, 155,
+            112, 198, 41, 231, 156, 116, 214, 105, 231, 157, 120, 230, 169, 231, 158, 124, 246,
+            233, 231, 159, 128, 6, 42, 232, 160, 132, 22, 106, 232, 161, 136, 38, 170, 232, 162,
+            140, 54, 234, 232, 163, 144, 70, 42, 233, 164, 148, 86, 106, 233, 165, 152, 102, 170,
+            233, 166, 156, 118, 234, 233, 167, 160, 134, 42, 234, 168, 164, 150, 106, 234, 169,
+            168, 166, 170, 234, 170, 172, 182, 234, 234, 171, 176, 198, 42, 235, 172, 180, 214,
+            106, 235, 173, 184, 230, 170, 235, 174, 188, 246, 234, 235, 175, 192, 6, 43, 236, 176,
+            196, 22, 107, 236, 177, 200, 38, 171, 236, 178, 204, 54, 235, 236, 179, 208, 70, 43,
+            237, 180, 212, 86, 107, 237, 181, 216, 102, 171, 237, 182, 220, 118, 235, 237, 183,
+            224, 134, 43, 238, 184, 228, 150, 107, 238, 185, 232, 166, 171, 238, 186, 236, 182,
+            235, 238, 187, 240, 198, 43, 239, 188, 244, 214, 107, 239, 189, 248, 230, 171, 239,
+            190, 252, 246, 235, 239, 191, 0, 7, 44, 240, 192, 4, 23, 108, 240, 193, 8, 39, 172,
+            240, 194, 12, 55, 236, 240, 195, 16, 71, 44, 241, 196, 20, 87, 108, 241, 197, 24, 103,
+            172, 241, 198, 28, 119, 236, 241, 199, 32, 135, 44, 242, 200, 36, 151, 108, 242, 201,
+            40, 167, 172, 242, 202, 44, 183, 236, 242, 203, 48, 199, 44, 243, 204, 52, 215, 108,
+            243, 205, 56, 231, 172, 243, 206, 60, 247, 236, 243, 207, 64, 7, 45, 244, 208, 68, 23,
+            109, 244, 209, 72, 39, 173, 244, 210, 76, 55, 237, 244, 211, 80, 71, 45, 245, 212, 84,
+            87, 109, 245, 213, 88, 103, 173, 245, 214, 92, 119, 237, 245, 215, 96, 135, 45, 246,
+            216, 100, 151, 109, 246, 217, 104, 167, 173, 246, 218, 108, 183, 237, 246, 219, 112,
+            199, 45, 247, 220, 116, 215, 109, 247, 221, 120, 231, 173, 247, 222, 124, 247, 237,
+            247, 223, 128, 7, 46, 248, 224, 132, 23, 110, 248, 225, 136, 39, 174, 248, 226, 140,
+            55, 238, 248, 227, 144, 71, 46, 249, 228, 148, 87, 110, 249, 229, 152, 103, 174, 249,
+            230, 156, 119, 238, 249, 231, 160, 135, 46, 250, 232, 164, 151, 110, 250, 233, 168,
+            167, 174, 250, 234, 172, 183, 238, 250, 235, 176, 199, 46, 251, 236, 180, 215, 110,
+            251, 237, 184, 231, 174, 251, 238, 188, 247, 238, 251, 239, 192, 7, 47, 252, 240, 196,
+            23, 111, 252, 241, 200, 39, 175, 252, 242, 204, 55, 239, 252, 243, 208, 71, 47, 253,
+            244, 212, 87, 111, 253, 245, 216, 103, 175, 253, 246, 220, 119, 239, 253, 247, 224,
+            135, 47, 254, 248, 228, 151, 111, 254, 249,
+        ];
+        let num_bits = 10;
+
+        let decoder = HybridRleDecoder::try_new(&data, num_bits, 1000)?;
+
+        let result = decoder.collect::<Result<Vec<_>, _>>()?;
+
+        assert_eq!(result, (0..1000).collect::<Vec<_>>());
+        Ok(())
+    }
+
+    #[test]
+    fn small() -> Result<(), Error> {
+        let data = vec![3, 2];
+
+        let num_bits = 3;
+
+        let decoder = HybridRleDecoder::try_new(&data, num_bits, 1)?;
+
+        let result = decoder.collect::<Result<Vec<_>, _>>()?;
+
+        assert_eq!(result, &[2]);
+        Ok(())
+    }
+
+    #[test]
+    fn zero_bit_width() -> Result<(), Error> {
+        let data = vec![3];
+
+        let num_bits = 0;
+
+        let decoder = HybridRleDecoder::try_new(&data, num_bits, 2)?;
+
+        let result = decoder.collect::<Result<Vec<_>, _>>()?;
+
+        assert_eq!(result, &[0, 0]);
+        Ok(())
+    }
+
+    #[test]
+    fn empty_values() -> Result<(), Error> {
+        let data = [];
+
+        let num_bits = 1;
+
+        let decoder = HybridRleDecoder::try_new(&data, num_bits, 100)?;
+
+        let result = decoder.collect::<Result<Vec<_>, _>>()?;
+
+        assert_eq!(result, vec![0; 100]);
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/mod.rs b/crates/polars-parquet/src/parquet/encoding/mod.rs
new file mode 100644
index 000000000000..79b608ab63b7
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/mod.rs
@@ -0,0 +1,27 @@
+use std::convert::TryInto;
+
+pub mod bitpacked;
+pub mod delta_bitpacked;
+pub mod delta_byte_array;
+pub mod delta_length_byte_array;
+pub mod hybrid_rle;
+pub mod plain_byte_array;
+pub mod uleb128;
+pub mod zigzag_leb128;
+
+pub use crate::parquet::parquet_bridge::Encoding;
+
+/// # Panics
+/// This function panics iff `values.len() < 4`.
+#[inline]
+pub fn get_length(values: &[u8]) -> Option<usize> {
+    values
+        .get(0..4)
+        .map(|x| u32::from_le_bytes(x.try_into().unwrap()) as usize)
+}
+
+/// Returns the ceil of value / 8
+#[inline]
+pub fn ceil8(value: usize) -> usize {
+    value / 8 + ((value % 8 != 0) as usize)
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/plain_byte_array.rs b/crates/polars-parquet/src/parquet/encoding/plain_byte_array.rs
new file mode 100644
index 000000000000..d29f8c82c6de
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/plain_byte_array.rs
@@ -0,0 +1,46 @@
+/// Decodes according to [Plain strings](https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0),
+/// prefixes, lengths and values
+/// # Implementation
+/// This struct does not allocate on the heap.
+use crate::parquet::error::Error;
+
+#[derive(Debug)]
+pub struct BinaryIter<'a> {
+    values: &'a [u8],
+    length: Option<usize>,
+}
+
+impl<'a> BinaryIter<'a> {
+    pub fn new(values: &'a [u8], length: Option<usize>) -> Self {
+        Self { values, length }
+    }
+}
+
+impl<'a> Iterator for BinaryIter<'a> {
+    type Item = Result<&'a [u8], Error>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.values.len() < 4 {
+            return None;
+        }
+        if let Some(x) = self.length.as_mut() {
+            *x = x.saturating_sub(1)
+        }
+        let length = u32::from_le_bytes(self.values[0..4].try_into().unwrap()) as usize;
+        self.values = &self.values[4..];
+        if length > self.values.len() {
+            return Some(Err(Error::oos(
+                "A string in plain encoding declares a length that is out of range",
+            )));
+        }
+        let (result, remaining) = self.values.split_at(length);
+        self.values = remaining;
+        Some(Ok(result))
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.length.unwrap_or_default(), self.length)
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/uleb128.rs b/crates/polars-parquet/src/parquet/encoding/uleb128.rs
new file mode 100644
index 000000000000..c91568e2ee86
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/uleb128.rs
@@ -0,0 +1,97 @@
+use crate::parquet::error::Error;
+
+pub fn decode(values: &[u8]) -> Result<(u64, usize), Error> {
+    let mut result = 0;
+    let mut shift = 0;
+
+    let mut consumed = 0;
+    for byte in values {
+        consumed += 1;
+        if shift == 63 && *byte > 1 {
+            panic!()
+        };
+
+        result |= u64::from(byte & 0b01111111) << shift;
+
+        if byte & 0b10000000 == 0 {
+            break;
+        }
+
+        shift += 7;
+    }
+    Ok((result, consumed))
+}
+
+/// Encodes `value` in ULEB128 into `container`. The exact number of bytes written
+/// depends on `value`, and cannot be determined upfront. The maximum number of bytes
+/// required are 10.
+/// # Panic
+/// This function may panic if `container.len() < 10` and `value` requires more bytes.
+pub fn encode(mut value: u64, container: &mut [u8]) -> usize {
+    let mut consumed = 0;
+    let mut iter = container.iter_mut();
+    loop {
+        let mut byte = (value as u8) & !128;
+        value >>= 7;
+        if value != 0 {
+            byte |= 128;
+        }
+        *iter.next().unwrap() = byte;
+        consumed += 1;
+        if value == 0 {
+            break;
+        }
+    }
+    consumed
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn decode_1() {
+        let data = vec![0xe5, 0x8e, 0x26, 0xDE, 0xAD, 0xBE, 0xEF];
+        let (value, len) = decode(&data).unwrap();
+        assert_eq!(value, 624_485);
+        assert_eq!(len, 3);
+    }
+
+    #[test]
+    fn decode_2() {
+        let data = vec![0b00010000, 0b00000001, 0b00000011, 0b00000011];
+        let (value, len) = decode(&data).unwrap();
+        assert_eq!(value, 16);
+        assert_eq!(len, 1);
+    }
+
+    #[test]
+    fn round_trip() {
+        let original = 123124234u64;
+        let mut container = [0u8; 10];
+        let encoded_len = encode(original, &mut container);
+        let (value, len) = decode(&container).unwrap();
+        assert_eq!(value, original);
+        assert_eq!(len, encoded_len);
+    }
+
+    #[test]
+    fn min_value() {
+        let original = u64::MIN;
+        let mut container = [0u8; 10];
+        let encoded_len = encode(original, &mut container);
+        let (value, len) = decode(&container).unwrap();
+        assert_eq!(value, original);
+        assert_eq!(len, encoded_len);
+    }
+
+    #[test]
+    fn max_value() {
+        let original = u64::MAX;
+        let mut container = [0u8; 10];
+        let encoded_len = encode(original, &mut container);
+        let (value, len) = decode(&container).unwrap();
+        assert_eq!(value, original);
+        assert_eq!(len, encoded_len);
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/encoding/zigzag_leb128.rs b/crates/polars-parquet/src/parquet/encoding/zigzag_leb128.rs
new file mode 100644
index 000000000000..0a673136cc73
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/encoding/zigzag_leb128.rs
@@ -0,0 +1,69 @@
+use super::uleb128;
+use crate::parquet::error::Error;
+
+pub fn decode(values: &[u8]) -> Result<(i64, usize), Error> {
+    let (u, consumed) = uleb128::decode(values)?;
+    Ok(((u >> 1) as i64 ^ -((u & 1) as i64), consumed))
+}
+
+pub fn encode(value: i64) -> ([u8; 10], usize) {
+    let value = ((value << 1) ^ (value >> (64 - 1))) as u64;
+    let mut a = [0u8; 10];
+    let produced = uleb128::encode(value, &mut a);
+    (a, produced)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_decode() {
+        // see e.g. https://stackoverflow.com/a/2211086/931303
+        let cases = vec![
+            (0u8, 0i64),
+            (1, -1),
+            (2, 1),
+            (3, -2),
+            (4, 2),
+            (5, -3),
+            (6, 3),
+            (7, -4),
+            (8, 4),
+            (9, -5),
+        ];
+        for (data, expected) in cases {
+            let (result, _) = decode(&[data]).unwrap();
+            assert_eq!(result, expected)
+        }
+    }
+
+    #[test]
+    fn test_encode() {
+        let cases = vec![
+            (0u8, 0i64),
+            (1, -1),
+            (2, 1),
+            (3, -2),
+            (4, 2),
+            (5, -3),
+            (6, 3),
+            (7, -4),
+            (8, 4),
+            (9, -5),
+        ];
+        for (expected, data) in cases {
+            let (result, size) = encode(data);
+            assert_eq!(size, 1);
+            assert_eq!(result[0], expected)
+        }
+    }
+
+    #[test]
+    fn test_roundtrip() {
+        let value = -1001212312;
+        let (data, size) = encode(value);
+        let (result, _) = decode(&data[..size]).unwrap();
+        assert_eq!(value, result);
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/error.rs b/crates/polars-parquet/src/parquet/error.rs
new file mode 100644
index 000000000000..78022fd5d4ec
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/error.rs
@@ -0,0 +1,134 @@
+//! Contains [`Error`]
+
+/// List of features whose non-activation may cause a runtime error.
+/// Used to indicate which lack of feature caused [`Error::FeatureNotActive`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[non_exhaustive]
+pub enum Feature {
+    /// Snappy compression and decompression
+    Snappy,
+    /// Brotli compression and decompression
+    Brotli,
+    /// Gzip compression and decompression
+    Gzip,
+    /// Lz4 raw compression and decompression
+    Lz4,
+    /// Zstd compression and decompression
+    Zstd,
+}
+
+/// Errors generated by this crate
+#[derive(Debug, Clone)]
+#[non_exhaustive]
+pub enum Error {
+    /// When the parquet file is known to be out of spec.
+    OutOfSpec(String),
+    /// Error presented when trying to use a code branch that requires activating a feature.
+    FeatureNotActive(Feature, String),
+    /// Error presented when trying to use a feature from parquet that is not yet supported
+    FeatureNotSupported(String),
+    /// When encoding, the user passed an invalid parameter
+    InvalidParameter(String),
+    /// When decoding or decompressing, the page would allocate more memory than allowed
+    WouldOverAllocate,
+}
+
+impl Error {
+    pub(crate) fn oos<I: Into<String>>(message: I) -> Self {
+        Self::OutOfSpec(message.into())
+    }
+}
+
+impl std::error::Error for Error {}
+
+impl std::fmt::Display for Error {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match self {
+            Error::OutOfSpec(message) => {
+                write!(fmt, "File out of specification: {}", message)
+            },
+            Error::FeatureNotActive(feature, reason) => {
+                write!(
+                    fmt,
+                    "The feature \"{:?}\" needs to be active to {}",
+                    feature, reason
+                )
+            },
+            Error::FeatureNotSupported(reason) => {
+                write!(fmt, "Not yet supported: {}", reason)
+            },
+            Error::InvalidParameter(message) => {
+                write!(fmt, "Invalid parameter: {}", message)
+            },
+            Error::WouldOverAllocate => {
+                write!(fmt, "Operation would exceed memory use threshold")
+            },
+        }
+    }
+}
+
+#[cfg(feature = "snappy")]
+impl From<snap::Error> for Error {
+    fn from(e: snap::Error) -> Error {
+        Error::OutOfSpec(format!("underlying snap error: {}", e))
+    }
+}
+
+#[cfg(feature = "lz4_flex")]
+impl From<lz4_flex::block::DecompressError> for Error {
+    fn from(e: lz4_flex::block::DecompressError) -> Error {
+        Error::OutOfSpec(format!("underlying lz4_flex error: {}", e))
+    }
+}
+
+#[cfg(feature = "lz4_flex")]
+impl From<lz4_flex::block::CompressError> for Error {
+    fn from(e: lz4_flex::block::CompressError) -> Error {
+        Error::OutOfSpec(format!("underlying lz4_flex error: {}", e))
+    }
+}
+
+impl From<parquet_format_safe::thrift::Error> for Error {
+    fn from(e: parquet_format_safe::thrift::Error) -> Error {
+        Error::OutOfSpec(format!("Invalid thrift: {}", e))
+    }
+}
+
+impl From<std::io::Error> for Error {
+    fn from(e: std::io::Error) -> Error {
+        Error::OutOfSpec(format!("underlying IO error: {}", e))
+    }
+}
+
+impl From<std::collections::TryReserveError> for Error {
+    fn from(e: std::collections::TryReserveError) -> Error {
+        Error::OutOfSpec(format!("OOM: {}", e))
+    }
+}
+
+impl From<std::num::TryFromIntError> for Error {
+    fn from(e: std::num::TryFromIntError) -> Error {
+        Error::OutOfSpec(format!("Number must be zero or positive: {}", e))
+    }
+}
+
+impl From<std::array::TryFromSliceError> for Error {
+    fn from(e: std::array::TryFromSliceError) -> Error {
+        Error::OutOfSpec(format!("Can't deserialize to parquet native type: {}", e))
+    }
+}
+
+/// A specialized `Result` for Parquet errors.
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl From<Error> for polars_error::PolarsError {
+    fn from(e: Error) -> polars_error::PolarsError {
+        polars_error::PolarsError::ComputeError(format!("parquet: {}", e).into())
+    }
+}
+
+impl From<polars_error::PolarsError> for Error {
+    fn from(e: polars_error::PolarsError) -> Error {
+        Error::OutOfSpec(format!("OOM: {}", e))
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/indexes/index.rs b/crates/polars-parquet/src/parquet/indexes/index.rs
new file mode 100644
index 000000000000..08206659da3c
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/indexes/index.rs
@@ -0,0 +1,322 @@
+use std::any::Any;
+
+use parquet_format_safe::ColumnIndex;
+
+use crate::parquet::error::Error;
+use crate::parquet::parquet_bridge::BoundaryOrder;
+use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
+use crate::parquet::types::NativeType;
+
+/// Trait object representing a [`ColumnIndex`] in Rust's native format.
+///
+/// See [`NativeIndex`], [`ByteIndex`] and [`FixedLenByteIndex`] for concrete implementations.
+pub trait Index: Send + Sync + std::fmt::Debug {
+    fn as_any(&self) -> &dyn Any;
+
+    fn physical_type(&self) -> &PhysicalType;
+}
+
+impl PartialEq for dyn Index + '_ {
+    fn eq(&self, that: &dyn Index) -> bool {
+        equal(self, that)
+    }
+}
+
+impl Eq for dyn Index + '_ {}
+
+fn equal(lhs: &dyn Index, rhs: &dyn Index) -> bool {
+    if lhs.physical_type() != rhs.physical_type() {
+        return false;
+    }
+
+    match lhs.physical_type() {
+        PhysicalType::Boolean => {
+            lhs.as_any().downcast_ref::<BooleanIndex>().unwrap()
+                == rhs.as_any().downcast_ref::<BooleanIndex>().unwrap()
+        },
+        PhysicalType::Int32 => {
+            lhs.as_any().downcast_ref::<NativeIndex<i32>>().unwrap()
+                == rhs.as_any().downcast_ref::<NativeIndex<i32>>().unwrap()
+        },
+        PhysicalType::Int64 => {
+            lhs.as_any().downcast_ref::<NativeIndex<i64>>().unwrap()
+                == rhs.as_any().downcast_ref::<NativeIndex<i64>>().unwrap()
+        },
+        PhysicalType::Int96 => {
+            lhs.as_any()
+                .downcast_ref::<NativeIndex<[u32; 3]>>()
+                .unwrap()
+                == rhs
+                    .as_any()
+                    .downcast_ref::<NativeIndex<[u32; 3]>>()
+                    .unwrap()
+        },
+        PhysicalType::Float => {
+            lhs.as_any().downcast_ref::<NativeIndex<f32>>().unwrap()
+                == rhs.as_any().downcast_ref::<NativeIndex<f32>>().unwrap()
+        },
+        PhysicalType::Double => {
+            lhs.as_any().downcast_ref::<NativeIndex<f64>>().unwrap()
+                == rhs.as_any().downcast_ref::<NativeIndex<f64>>().unwrap()
+        },
+        PhysicalType::ByteArray => {
+            lhs.as_any().downcast_ref::<ByteIndex>().unwrap()
+                == rhs.as_any().downcast_ref::<ByteIndex>().unwrap()
+        },
+        PhysicalType::FixedLenByteArray(_) => {
+            lhs.as_any().downcast_ref::<FixedLenByteIndex>().unwrap()
+                == rhs.as_any().downcast_ref::<FixedLenByteIndex>().unwrap()
+        },
+    }
+}
+
+/// An index of a column of [`NativeType`] physical representation
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct NativeIndex<T: NativeType> {
+    /// The primitive type
+    pub primitive_type: PrimitiveType,
+    /// The indexes, one item per page
+    pub indexes: Vec<PageIndex<T>>,
+    /// the order
+    pub boundary_order: BoundaryOrder,
+}
+
+impl<T: NativeType> NativeIndex<T> {
+    /// Creates a new [`NativeIndex`]
+    pub(crate) fn try_new(
+        index: ColumnIndex,
+        primitive_type: PrimitiveType,
+    ) -> Result<Self, Error> {
+        let len = index.min_values.len();
+
+        let null_counts = index
+            .null_counts
+            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
+            .unwrap_or_else(|| vec![None; len]);
+
+        let indexes = index
+            .min_values
+            .iter()
+            .zip(index.max_values.into_iter())
+            .zip(index.null_pages.into_iter())
+            .zip(null_counts.into_iter())
+            .map(|(((min, max), is_null), null_count)| {
+                let (min, max) = if is_null {
+                    (None, None)
+                } else {
+                    let min = min.as_slice().try_into()?;
+                    let max = max.as_slice().try_into()?;
+                    (Some(T::from_le_bytes(min)), Some(T::from_le_bytes(max)))
+                };
+                Ok(PageIndex {
+                    min,
+                    max,
+                    null_count,
+                })
+            })
+            .collect::<Result<Vec<_>, Error>>()?;
+
+        Ok(Self {
+            primitive_type,
+            indexes,
+            boundary_order: index.boundary_order.try_into()?,
+        })
+    }
+}
+
+/// The index of a page, containing the min and max values of the page.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct PageIndex<T> {
+    /// The minimum value in the page. It is None when all values are null
+    pub min: Option<T>,
+    /// The maximum value in the page. It is None when all values are null
+    pub max: Option<T>,
+    /// The number of null values in the page
+    pub null_count: Option<i64>,
+}
+
+impl<T: NativeType> Index for NativeIndex<T> {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &T::TYPE
+    }
+}
+
+/// An index of a column of bytes physical type
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ByteIndex {
+    /// The [`PrimitiveType`].
+    pub primitive_type: PrimitiveType,
+    /// The indexes, one item per page
+    pub indexes: Vec<PageIndex<Vec<u8>>>,
+    pub boundary_order: BoundaryOrder,
+}
+
+impl ByteIndex {
+    pub(crate) fn try_new(
+        index: ColumnIndex,
+        primitive_type: PrimitiveType,
+    ) -> Result<Self, Error> {
+        let len = index.min_values.len();
+
+        let null_counts = index
+            .null_counts
+            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
+            .unwrap_or_else(|| vec![None; len]);
+
+        let indexes = index
+            .min_values
+            .into_iter()
+            .zip(index.max_values.into_iter())
+            .zip(index.null_pages.into_iter())
+            .zip(null_counts.into_iter())
+            .map(|(((min, max), is_null), null_count)| {
+                let (min, max) = if is_null {
+                    (None, None)
+                } else {
+                    (Some(min), Some(max))
+                };
+                Ok(PageIndex {
+                    min,
+                    max,
+                    null_count,
+                })
+            })
+            .collect::<Result<Vec<_>, Error>>()?;
+
+        Ok(Self {
+            primitive_type,
+            indexes,
+            boundary_order: index.boundary_order.try_into()?,
+        })
+    }
+}
+
+impl Index for ByteIndex {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &PhysicalType::ByteArray
+    }
+}
+
+/// An index of a column of fixed len byte physical type
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct FixedLenByteIndex {
+    /// The [`PrimitiveType`].
+    pub primitive_type: PrimitiveType,
+    /// The indexes, one item per page
+    pub indexes: Vec<PageIndex<Vec<u8>>>,
+    pub boundary_order: BoundaryOrder,
+}
+
+impl FixedLenByteIndex {
+    pub(crate) fn try_new(
+        index: ColumnIndex,
+        primitive_type: PrimitiveType,
+    ) -> Result<Self, Error> {
+        let len = index.min_values.len();
+
+        let null_counts = index
+            .null_counts
+            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
+            .unwrap_or_else(|| vec![None; len]);
+
+        let indexes = index
+            .min_values
+            .into_iter()
+            .zip(index.max_values.into_iter())
+            .zip(index.null_pages.into_iter())
+            .zip(null_counts.into_iter())
+            .map(|(((min, max), is_null), null_count)| {
+                let (min, max) = if is_null {
+                    (None, None)
+                } else {
+                    (Some(min), Some(max))
+                };
+                Ok(PageIndex {
+                    min,
+                    max,
+                    null_count,
+                })
+            })
+            .collect::<Result<Vec<_>, Error>>()?;
+
+        Ok(Self {
+            primitive_type,
+            indexes,
+            boundary_order: index.boundary_order.try_into()?,
+        })
+    }
+}
+
+impl Index for FixedLenByteIndex {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &self.primitive_type.physical_type
+    }
+}
+
+/// An index of a column of boolean physical type
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct BooleanIndex {
+    /// The indexes, one item per page
+    pub indexes: Vec<PageIndex<bool>>,
+    pub boundary_order: BoundaryOrder,
+}
+
+impl BooleanIndex {
+    pub(crate) fn try_new(index: ColumnIndex) -> Result<Self, Error> {
+        let len = index.min_values.len();
+
+        let null_counts = index
+            .null_counts
+            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
+            .unwrap_or_else(|| vec![None; len]);
+
+        let indexes = index
+            .min_values
+            .into_iter()
+            .zip(index.max_values.into_iter())
+            .zip(index.null_pages.into_iter())
+            .zip(null_counts.into_iter())
+            .map(|(((min, max), is_null), null_count)| {
+                let (min, max) = if is_null {
+                    (None, None)
+                } else {
+                    let min = min[0] == 1;
+                    let max = max[0] == 1;
+                    (Some(min), Some(max))
+                };
+                Ok(PageIndex {
+                    min,
+                    max,
+                    null_count,
+                })
+            })
+            .collect::<Result<Vec<_>, Error>>()?;
+
+        Ok(Self {
+            indexes,
+            boundary_order: index.boundary_order.try_into()?,
+        })
+    }
+}
+
+impl Index for BooleanIndex {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &PhysicalType::Boolean
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/indexes/intervals.rs b/crates/polars-parquet/src/parquet/indexes/intervals.rs
new file mode 100644
index 000000000000..f6cbdf9432a3
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/indexes/intervals.rs
@@ -0,0 +1,137 @@
+use parquet_format_safe::PageLocation;
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use crate::parquet::error::Error;
+
+/// An interval
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct Interval {
+    /// Its start
+    pub start: usize,
+    /// Its length
+    pub length: usize,
+}
+
+impl Interval {
+    /// Create a new interval
+    pub fn new(start: usize, length: usize) -> Self {
+        Self { start, length }
+    }
+}
+
+/// Returns the set of (row) intervals of the pages.
+/// # Errors
+/// This function errors if the locations are not castable to `usize` or such that
+/// their ranges of row are larger than `num_rows`.
+pub fn compute_page_row_intervals(
+    locations: &[PageLocation],
+    num_rows: usize,
+) -> Result<Vec<Interval>, Error> {
+    if locations.is_empty() {
+        return Ok(vec![]);
+    };
+
+    let last = (|| {
+        let start: usize = locations.last().unwrap().first_row_index.try_into()?;
+        let length = num_rows
+            .checked_sub(start)
+            .ok_or_else(|| Error::oos("Page start cannot be smaller than the number of rows"))?;
+        Result::<_, Error>::Ok(Interval::new(start, length))
+    })();
+
+    let pages_lengths = locations
+        .windows(2)
+        .map(|x| {
+            let start = x[0].first_row_index.try_into()?;
+
+            let length = x[1]
+                .first_row_index
+                .checked_sub(x[0].first_row_index)
+                .ok_or_else(|| Error::oos("Page start cannot be smaller than the number of rows"))?
+                .try_into()?;
+
+            Ok(Interval::new(start, length))
+        })
+        .chain(std::iter::once(last));
+    pages_lengths.collect()
+}
+
+/// Returns the set of intervals `(start, len)` containing all the
+/// selected rows (for a given column)
+pub fn compute_rows(
+    selected: &[bool],
+    locations: &[PageLocation],
+    num_rows: usize,
+) -> Result<Vec<Interval>, Error> {
+    let page_intervals = compute_page_row_intervals(locations, num_rows)?;
+
+    Ok(selected
+        .iter()
+        .zip(page_intervals.iter().copied())
+        .filter_map(
+            |(&is_selected, page)| {
+                if is_selected {
+                    Some(page)
+                } else {
+                    None
+                }
+            },
+        )
+        .collect())
+}
+
+/// An enum describing a page that was either selected in a filter pushdown or skipped
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct FilteredPage {
+    /// Location of the page in the file
+    pub start: u64,
+    pub length: usize,
+    /// rows to select from the page
+    pub selected_rows: Vec<Interval>,
+    pub num_rows: usize,
+}
+
+fn is_in(probe: Interval, intervals: &[Interval]) -> Vec<Interval> {
+    intervals
+        .iter()
+        .filter_map(|interval| {
+            let interval_end = interval.start + interval.length;
+            let probe_end = probe.start + probe.length;
+            let overlaps = (probe.start < interval_end) && (probe_end > interval.start);
+            if overlaps {
+                let start = interval.start.max(probe.start);
+                let end = interval_end.min(probe_end);
+                Some(Interval::new(start - probe.start, end - start))
+            } else {
+                None
+            }
+        })
+        .collect()
+}
+
+/// Given a set of selected [Interval]s of rows and the set of [`PageLocation`], returns the
+/// a set of [`FilteredPage`] with the same number of items as `locations`.
+pub fn select_pages(
+    intervals: &[Interval],
+    locations: &[PageLocation],
+    num_rows: usize,
+) -> Result<Vec<FilteredPage>, Error> {
+    let page_intervals = compute_page_row_intervals(locations, num_rows)?;
+
+    page_intervals
+        .into_iter()
+        .zip(locations.iter())
+        .map(|(interval, location)| {
+            let selected_rows = is_in(interval, intervals);
+            Ok(FilteredPage {
+                start: location.offset.try_into()?,
+                length: location.compressed_page_size.try_into()?,
+                selected_rows,
+                num_rows: interval.length,
+            })
+        })
+        .collect()
+}
diff --git a/crates/polars-parquet/src/parquet/indexes/mod.rs b/crates/polars-parquet/src/parquet/indexes/mod.rs
new file mode 100644
index 000000000000..f652f8bb4be3
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/indexes/mod.rs
@@ -0,0 +1,234 @@
+mod index;
+mod intervals;
+
+pub use intervals::{compute_rows, select_pages, FilteredPage, Interval};
+
+pub use self::index::{BooleanIndex, ByteIndex, FixedLenByteIndex, Index, NativeIndex, PageIndex};
+pub use crate::parquet::parquet_bridge::BoundaryOrder;
+pub use crate::parquet::thrift_format::PageLocation;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
+
+    #[test]
+    fn test_basic() {
+        let locations = &[PageLocation {
+            offset: 100,
+            compressed_page_size: 10,
+            first_row_index: 0,
+        }];
+        let num_rows = 10;
+
+        let row_intervals = compute_rows(&[true; 1], locations, num_rows).unwrap();
+        assert_eq!(row_intervals, vec![Interval::new(0, 10)])
+    }
+
+    #[test]
+    fn test_multiple() {
+        // two pages
+        let index = ByteIndex {
+            primitive_type: PrimitiveType::from_physical("c1".to_string(), PhysicalType::ByteArray),
+            indexes: vec![
+                PageIndex {
+                    min: Some(vec![0]),
+                    max: Some(vec![8, 9]),
+                    null_count: Some(0),
+                },
+                PageIndex {
+                    min: Some(vec![20]),
+                    max: Some(vec![98, 99]),
+                    null_count: Some(0),
+                },
+            ],
+            boundary_order: Default::default(),
+        };
+        let locations = &[
+            PageLocation {
+                offset: 100,
+                compressed_page_size: 10,
+                first_row_index: 0,
+            },
+            PageLocation {
+                offset: 110,
+                compressed_page_size: 20,
+                first_row_index: 5,
+            },
+        ];
+        let num_rows = 10;
+
+        // filter of the form `x > "a"`
+        let selector = |page: &PageIndex<Vec<u8>>| {
+            page.max
+                .as_ref()
+                .map(|x| x.as_slice()[0] > 97)
+                .unwrap_or(false) // no max is present => all nulls => not selected
+        };
+        let selected = index.indexes.iter().map(selector).collect::<Vec<_>>();
+
+        let rows = compute_rows(&selected, locations, num_rows).unwrap();
+        assert_eq!(rows, vec![Interval::new(5, 5)]);
+
+        let pages = select_pages(&rows, locations, num_rows).unwrap();
+
+        assert_eq!(
+            pages,
+            vec![
+                FilteredPage {
+                    start: 100,
+                    length: 10,
+                    selected_rows: vec![],
+                    num_rows: 5
+                },
+                FilteredPage {
+                    start: 110,
+                    length: 20,
+                    selected_rows: vec![Interval::new(0, 5)],
+                    num_rows: 5
+                }
+            ]
+        );
+    }
+
+    #[test]
+    fn test_other_column() {
+        let locations = &[
+            PageLocation {
+                offset: 100,
+                compressed_page_size: 20,
+                first_row_index: 0,
+            },
+            PageLocation {
+                offset: 120,
+                compressed_page_size: 20,
+                first_row_index: 10,
+            },
+        ];
+        let num_rows = 100;
+
+        let intervals = &[Interval::new(5, 5)];
+
+        let pages = select_pages(intervals, locations, num_rows).unwrap();
+
+        assert_eq!(
+            pages,
+            vec![
+                FilteredPage {
+                    start: 100,
+                    length: 20,
+                    selected_rows: vec![Interval::new(5, 5)],
+                    num_rows: 10,
+                },
+                FilteredPage {
+                    start: 120,
+                    length: 20,
+                    selected_rows: vec![],
+                    num_rows: 90
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_other_interval_in_middle() {
+        let locations = &[
+            PageLocation {
+                offset: 100,
+                compressed_page_size: 20,
+                first_row_index: 0,
+            },
+            PageLocation {
+                offset: 120,
+                compressed_page_size: 20,
+                first_row_index: 10,
+            },
+            PageLocation {
+                offset: 140,
+                compressed_page_size: 20,
+                first_row_index: 100,
+            },
+        ];
+        let num_rows = 200;
+
+        // interval partially intersects 2 pages (0 and 1)
+        let intervals = &[Interval::new(5, 6)];
+
+        let pages = select_pages(intervals, locations, num_rows).unwrap();
+
+        assert_eq!(
+            pages,
+            vec![
+                FilteredPage {
+                    start: 100,
+                    length: 20,
+                    selected_rows: vec![Interval::new(5, 5)],
+                    num_rows: 10,
+                },
+                FilteredPage {
+                    start: 120,
+                    length: 20,
+                    selected_rows: vec![Interval::new(0, 1)],
+                    num_rows: 90,
+                },
+                FilteredPage {
+                    start: 140,
+                    length: 20,
+                    selected_rows: vec![],
+                    num_rows: 100
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_other_column2() {
+        let locations = &[
+            PageLocation {
+                offset: 100,
+                compressed_page_size: 20,
+                first_row_index: 0,
+            },
+            PageLocation {
+                offset: 120,
+                compressed_page_size: 20,
+                first_row_index: 10,
+            },
+            PageLocation {
+                offset: 140,
+                compressed_page_size: 20,
+                first_row_index: 100,
+            },
+        ];
+        let num_rows = 200;
+
+        // interval partially intersects 1 page (0)
+        let intervals = &[Interval::new(0, 1)];
+
+        let pages = select_pages(intervals, locations, num_rows).unwrap();
+
+        assert_eq!(
+            pages,
+            vec![
+                FilteredPage {
+                    start: 100,
+                    length: 20,
+                    selected_rows: vec![Interval::new(0, 1)],
+                    num_rows: 10,
+                },
+                FilteredPage {
+                    start: 120,
+                    length: 20,
+                    selected_rows: vec![],
+                    num_rows: 90
+                },
+                FilteredPage {
+                    start: 140,
+                    length: 20,
+                    selected_rows: vec![],
+                    num_rows: 100
+                },
+            ]
+        );
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/metadata/column_chunk_metadata.rs b/crates/polars-parquet/src/parquet/metadata/column_chunk_metadata.rs
new file mode 100644
index 000000000000..f5487e323fbf
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/column_chunk_metadata.rs
@@ -0,0 +1,210 @@
+use std::sync::Arc;
+
+use parquet_format_safe::{ColumnChunk, ColumnMetaData, Encoding};
+
+use super::column_descriptor::ColumnDescriptor;
+use crate::parquet::compression::Compression;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::types::PhysicalType;
+use crate::parquet::statistics::{deserialize_statistics, Statistics};
+
+#[cfg(feature = "serde_types")]
+mod serde_types {
+    pub use std::io::Cursor;
+
+    pub use parquet_format_safe::thrift::protocol::{
+        TCompactInputProtocol, TCompactOutputProtocol,
+    };
+    pub use serde::de::Error as DeserializeError;
+    pub use serde::ser::Error as SerializeError;
+    pub use serde::{Deserialize, Deserializer, Serialize, Serializer};
+}
+#[cfg(feature = "serde_types")]
+use serde_types::*;
+
+/// Metadata for a column chunk.
+// This contains the `ColumnDescriptor` associated with the chunk so that deserializers have
+// access to the descriptor (e.g. physical, converted, logical).
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct ColumnChunkMetaData {
+    #[cfg_attr(
+        feature = "serde_types",
+        serde(serialize_with = "serialize_column_chunk")
+    )]
+    #[cfg_attr(
+        feature = "serde_types",
+        serde(deserialize_with = "deserialize_column_chunk")
+    )]
+    column_chunk: ColumnChunk,
+    column_descr: ColumnDescriptor,
+}
+
+#[cfg(feature = "serde_types")]
+fn serialize_column_chunk<S>(
+    column_chunk: &ColumnChunk,
+    serializer: S,
+) -> std::result::Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    let mut buf = vec![];
+    let cursor = Cursor::new(&mut buf[..]);
+    let mut protocol = TCompactOutputProtocol::new(cursor);
+    column_chunk
+        .write_to_out_protocol(&mut protocol)
+        .map_err(S::Error::custom)?;
+    serializer.serialize_bytes(&buf)
+}
+
+#[cfg(feature = "serde_types")]
+fn deserialize_column_chunk<'de, D>(deserializer: D) -> std::result::Result<ColumnChunk, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let buf = Vec::<u8>::deserialize(deserializer)?;
+    let mut cursor = Cursor::new(&buf[..]);
+    let mut protocol = TCompactInputProtocol::new(&mut cursor, usize::MAX);
+    ColumnChunk::read_from_in_protocol(&mut protocol).map_err(D::Error::custom)
+}
+
+// Represents common operations for a column chunk.
+impl ColumnChunkMetaData {
+    /// Returns a new [`ColumnChunkMetaData`]
+    pub fn new(column_chunk: ColumnChunk, column_descr: ColumnDescriptor) -> Self {
+        Self {
+            column_chunk,
+            column_descr,
+        }
+    }
+
+    /// File where the column chunk is stored.
+    ///
+    /// If not set, assumed to belong to the same file as the metadata.
+    /// This path is relative to the current file.
+    pub fn file_path(&self) -> &Option<String> {
+        &self.column_chunk.file_path
+    }
+
+    /// Byte offset in `file_path()`.
+    pub fn file_offset(&self) -> i64 {
+        self.column_chunk.file_offset
+    }
+
+    /// Returns this column's [`ColumnChunk`]
+    pub fn column_chunk(&self) -> &ColumnChunk {
+        &self.column_chunk
+    }
+
+    /// The column's [`ColumnMetaData`]
+    pub fn metadata(&self) -> &ColumnMetaData {
+        self.column_chunk.meta_data.as_ref().unwrap()
+    }
+
+    /// The [`ColumnDescriptor`] for this column. This descriptor contains the physical and logical type
+    /// of the pages.
+    pub fn descriptor(&self) -> &ColumnDescriptor {
+        &self.column_descr
+    }
+
+    /// The [`PhysicalType`] of this column.
+    pub fn physical_type(&self) -> PhysicalType {
+        self.column_descr.descriptor.primitive_type.physical_type
+    }
+
+    /// Decodes the raw statistics into [`Statistics`].
+    pub fn statistics(&self) -> Option<Result<Arc<dyn Statistics>>> {
+        self.metadata()
+            .statistics
+            .as_ref()
+            .map(|x| deserialize_statistics(x, self.column_descr.descriptor.primitive_type.clone()))
+    }
+
+    /// Total number of values in this column chunk. Note that this is not necessarily the number
+    /// of rows. E.g. the (nested) array `[[1, 2], [3]]` has 2 rows and 3 values.
+    pub fn num_values(&self) -> i64 {
+        self.metadata().num_values
+    }
+
+    /// [`Compression`] for this column.
+    pub fn compression(&self) -> Compression {
+        self.metadata().codec.try_into().unwrap()
+    }
+
+    /// Returns the total compressed data size of this column chunk.
+    pub fn compressed_size(&self) -> i64 {
+        self.metadata().total_compressed_size
+    }
+
+    /// Returns the total uncompressed data size of this column chunk.
+    pub fn uncompressed_size(&self) -> i64 {
+        self.metadata().total_uncompressed_size
+    }
+
+    /// Returns the offset for the column data.
+    pub fn data_page_offset(&self) -> i64 {
+        self.metadata().data_page_offset
+    }
+
+    /// Returns `true` if this column chunk contains a index page, `false` otherwise.
+    pub fn has_index_page(&self) -> bool {
+        self.metadata().index_page_offset.is_some()
+    }
+
+    /// Returns the offset for the index page.
+    pub fn index_page_offset(&self) -> Option<i64> {
+        self.metadata().index_page_offset
+    }
+
+    /// Returns the offset for the dictionary page, if any.
+    pub fn dictionary_page_offset(&self) -> Option<i64> {
+        self.metadata().dictionary_page_offset
+    }
+
+    /// Returns the encoding for this column
+    pub fn column_encoding(&self) -> &Vec<Encoding> {
+        &self.metadata().encodings
+    }
+
+    /// Returns the offset and length in bytes of the column chunk within the file
+    pub fn byte_range(&self) -> (u64, u64) {
+        let start = if let Some(dict_page_offset) = self.dictionary_page_offset() {
+            dict_page_offset as u64
+        } else {
+            self.data_page_offset() as u64
+        };
+        let length = self.compressed_size() as u64;
+        // this has been validated in [`try_from_thrift`]
+        (start, length)
+    }
+
+    /// Method to convert from Thrift.
+    pub(crate) fn try_from_thrift(
+        column_descr: ColumnDescriptor,
+        column_chunk: ColumnChunk,
+    ) -> Result<Self> {
+        // validate metadata
+        if let Some(meta) = &column_chunk.meta_data {
+            let _: u64 = meta.total_compressed_size.try_into()?;
+
+            if let Some(offset) = meta.dictionary_page_offset {
+                let _: u64 = offset.try_into()?;
+            }
+            let _: u64 = meta.data_page_offset.try_into()?;
+
+            let _: Compression = meta.codec.try_into()?;
+        } else {
+            return Err(Error::oos("Column chunk requires metadata"));
+        }
+
+        Ok(Self {
+            column_chunk,
+            column_descr,
+        })
+    }
+
+    /// Method to convert to Thrift.
+    pub fn into_thrift(self) -> ColumnChunk {
+        self.column_chunk
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/metadata/column_descriptor.rs b/crates/polars-parquet/src/parquet/metadata/column_descriptor.rs
new file mode 100644
index 000000000000..2c9a0d1f6e48
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/column_descriptor.rs
@@ -0,0 +1,50 @@
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use crate::parquet::schema::types::{ParquetType, PrimitiveType};
+
+/// A descriptor of a parquet column. It contains the necessary information to deserialize
+/// a parquet column.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct Descriptor {
+    /// The [`PrimitiveType`] of this column
+    pub primitive_type: PrimitiveType,
+
+    /// The maximum definition level
+    pub max_def_level: i16,
+
+    /// The maximum repetition level
+    pub max_rep_level: i16,
+}
+
+/// A descriptor for leaf-level primitive columns.
+/// This encapsulates information such as definition and repetition levels and is used to
+/// re-assemble nested data.
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct ColumnDescriptor {
+    /// The descriptor this columns' leaf.
+    pub descriptor: Descriptor,
+
+    /// The path of this column. For instance, "a.b.c.d".
+    pub path_in_schema: Vec<String>,
+
+    /// The [`ParquetType`] this descriptor is a leaf of
+    pub base_type: ParquetType,
+}
+
+impl ColumnDescriptor {
+    /// Creates new descriptor for leaf-level column.
+    pub fn new(
+        descriptor: Descriptor,
+        path_in_schema: Vec<String>,
+        base_type: ParquetType,
+    ) -> Self {
+        Self {
+            descriptor,
+            path_in_schema,
+            base_type,
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/metadata/column_order.rs b/crates/polars-parquet/src/parquet/metadata/column_order.rs
new file mode 100644
index 000000000000..4d66f615bfa0
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/column_order.rs
@@ -0,0 +1,30 @@
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use super::sort::SortOrder;
+
+/// Column order that specifies what method was used to aggregate min/max values for
+/// statistics.
+///
+/// If column order is undefined, then it is the legacy behaviour and all values should
+/// be compared as signed values/bytes.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum ColumnOrder {
+    /// Column uses the order defined by its logical or physical type
+    /// (if there is no logical type), parquet-format 2.4.0+.
+    TypeDefinedOrder(SortOrder),
+    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
+    /// Sort order is always SIGNED.
+    Undefined,
+}
+
+impl ColumnOrder {
+    /// Returns sort order associated with this column order.
+    pub fn sort_order(&self) -> SortOrder {
+        match *self {
+            ColumnOrder::TypeDefinedOrder(order) => order,
+            ColumnOrder::Undefined => SortOrder::Signed,
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/metadata/file_metadata.rs b/crates/polars-parquet/src/parquet/metadata/file_metadata.rs
new file mode 100644
index 000000000000..1c101fa9a561
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/file_metadata.rs
@@ -0,0 +1,129 @@
+use parquet_format_safe::ColumnOrder as TColumnOrder;
+
+use super::column_order::ColumnOrder;
+use super::schema_descriptor::SchemaDescriptor;
+use super::RowGroupMetaData;
+use crate::parquet::error::Error;
+use crate::parquet::metadata::get_sort_order;
+pub use crate::parquet::thrift_format::KeyValue;
+
+/// Metadata for a Parquet file.
+// This is almost equal to [`parquet_format_safe::FileMetaData`] but contains the descriptors,
+// which are crucial to deserialize pages.
+#[derive(Debug, Clone)]
+pub struct FileMetaData {
+    /// version of this file.
+    pub version: i32,
+    /// number of rows in the file.
+    pub num_rows: usize,
+    /// String message for application that wrote this file.
+    ///
+    /// This should have the following format:
+    /// `<application> version <application version> (build <application build hash>)`.
+    ///
+    /// ```shell
+    /// parquet-mr version 1.8.0 (build 0fda28af84b9746396014ad6a415b90592a98b3b)
+    /// ```
+    pub created_by: Option<String>,
+    /// The row groups of this file
+    pub row_groups: Vec<RowGroupMetaData>,
+    /// key_value_metadata of this file.
+    pub key_value_metadata: Option<Vec<KeyValue>>,
+    /// schema descriptor.
+    pub schema_descr: SchemaDescriptor,
+    /// Column (sort) order used for `min` and `max` values of each column in this file.
+    ///
+    /// Each column order corresponds to one column, determined by its position in the
+    /// list, matching the position of the column in the schema.
+    ///
+    /// When `None` is returned, there are no column orders available, and each column
+    /// should be assumed to have undefined (legacy) column order.
+    pub column_orders: Option<Vec<ColumnOrder>>,
+}
+
+impl FileMetaData {
+    /// Returns the [`SchemaDescriptor`] that describes schema of this file.
+    pub fn schema(&self) -> &SchemaDescriptor {
+        &self.schema_descr
+    }
+
+    /// returns the metadata
+    pub fn key_value_metadata(&self) -> &Option<Vec<KeyValue>> {
+        &self.key_value_metadata
+    }
+
+    /// Returns column order for `i`th column in this file.
+    /// If column orders are not available, returns undefined (legacy) column order.
+    pub fn column_order(&self, i: usize) -> ColumnOrder {
+        self.column_orders
+            .as_ref()
+            .map(|data| data[i])
+            .unwrap_or(ColumnOrder::Undefined)
+    }
+
+    /// Deserializes [`crate::parquet::thrift_format::FileMetaData`] into this struct
+    pub fn try_from_thrift(metadata: parquet_format_safe::FileMetaData) -> Result<Self, Error> {
+        let schema_descr = SchemaDescriptor::try_from_thrift(&metadata.schema)?;
+
+        let row_groups = metadata
+            .row_groups
+            .into_iter()
+            .map(|rg| RowGroupMetaData::try_from_thrift(&schema_descr, rg))
+            .collect::<Result<_, Error>>()?;
+
+        let column_orders = metadata
+            .column_orders
+            .map(|orders| parse_column_orders(&orders, &schema_descr));
+
+        Ok(FileMetaData {
+            version: metadata.version,
+            num_rows: metadata.num_rows.try_into()?,
+            created_by: metadata.created_by,
+            row_groups,
+            key_value_metadata: metadata.key_value_metadata,
+            schema_descr,
+            column_orders,
+        })
+    }
+
+    /// Serializes itself to thrift's [`parquet_format_safe::FileMetaData`].
+    pub fn into_thrift(self) -> parquet_format_safe::FileMetaData {
+        parquet_format_safe::FileMetaData {
+            version: self.version,
+            schema: self.schema_descr.into_thrift(),
+            num_rows: self.num_rows as i64,
+            row_groups: self
+                .row_groups
+                .into_iter()
+                .map(|v| v.into_thrift())
+                .collect(),
+            key_value_metadata: self.key_value_metadata,
+            created_by: self.created_by,
+            column_orders: None, // todo
+            encryption_algorithm: None,
+            footer_signing_key_metadata: None,
+        }
+    }
+}
+
+/// Parses [`ColumnOrder`] from Thrift definition.
+fn parse_column_orders(
+    orders: &[TColumnOrder],
+    schema_descr: &SchemaDescriptor,
+) -> Vec<ColumnOrder> {
+    schema_descr
+        .columns()
+        .iter()
+        .zip(orders.iter())
+        .map(|(column, order)| match order {
+            TColumnOrder::TYPEORDER(_) => {
+                let sort_order = get_sort_order(
+                    &column.descriptor.primitive_type.logical_type,
+                    &column.descriptor.primitive_type.converted_type,
+                    &column.descriptor.primitive_type.physical_type,
+                );
+                ColumnOrder::TypeDefinedOrder(sort_order)
+            },
+        })
+        .collect()
+}
diff --git a/crates/polars-parquet/src/parquet/metadata/mod.rs b/crates/polars-parquet/src/parquet/metadata/mod.rs
new file mode 100644
index 000000000000..2dfe81138fdd
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/mod.rs
@@ -0,0 +1,17 @@
+mod column_chunk_metadata;
+mod column_descriptor;
+mod column_order;
+mod file_metadata;
+mod row_metadata;
+mod schema_descriptor;
+mod sort;
+
+pub use column_chunk_metadata::ColumnChunkMetaData;
+pub use column_descriptor::{ColumnDescriptor, Descriptor};
+pub use column_order::ColumnOrder;
+pub use file_metadata::{FileMetaData, KeyValue};
+pub use row_metadata::RowGroupMetaData;
+pub use schema_descriptor::SchemaDescriptor;
+pub use sort::*;
+
+pub use crate::parquet::thrift_format::FileMetaData as ThriftFileMetaData;
diff --git a/crates/polars-parquet/src/parquet/metadata/row_metadata.rs b/crates/polars-parquet/src/parquet/metadata/row_metadata.rs
new file mode 100644
index 000000000000..60137ca6167a
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/row_metadata.rs
@@ -0,0 +1,103 @@
+use parquet_format_safe::RowGroup;
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use super::column_chunk_metadata::ColumnChunkMetaData;
+use super::schema_descriptor::SchemaDescriptor;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::write::ColumnOffsetsMetadata;
+
+/// Metadata for a row group.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct RowGroupMetaData {
+    columns: Vec<ColumnChunkMetaData>,
+    num_rows: usize,
+    total_byte_size: usize,
+}
+
+impl RowGroupMetaData {
+    /// Create a new [`RowGroupMetaData`]
+    pub fn new(
+        columns: Vec<ColumnChunkMetaData>,
+        num_rows: usize,
+        total_byte_size: usize,
+    ) -> RowGroupMetaData {
+        Self {
+            columns,
+            num_rows,
+            total_byte_size,
+        }
+    }
+
+    /// Returns slice of column chunk metadata.
+    pub fn columns(&self) -> &[ColumnChunkMetaData] {
+        &self.columns
+    }
+
+    /// Number of rows in this row group.
+    pub fn num_rows(&self) -> usize {
+        self.num_rows
+    }
+
+    /// Total byte size of all uncompressed column data in this row group.
+    pub fn total_byte_size(&self) -> usize {
+        self.total_byte_size
+    }
+
+    /// Total size of all compressed column data in this row group.
+    pub fn compressed_size(&self) -> usize {
+        self.columns
+            .iter()
+            .map(|c| c.compressed_size() as usize)
+            .sum::<usize>()
+    }
+
+    /// Method to convert from Thrift.
+    pub(crate) fn try_from_thrift(
+        schema_descr: &SchemaDescriptor,
+        rg: RowGroup,
+    ) -> Result<RowGroupMetaData> {
+        if schema_descr.columns().len() != rg.columns.len() {
+            return Err(Error::oos(format!("The number of columns in the row group ({}) must be equal to the number of columns in the schema ({})", rg.columns.len(), schema_descr.columns().len())));
+        }
+        let total_byte_size = rg.total_byte_size.try_into()?;
+        let num_rows = rg.num_rows.try_into()?;
+        let columns = rg
+            .columns
+            .into_iter()
+            .zip(schema_descr.columns())
+            .map(|(column_chunk, descriptor)| {
+                ColumnChunkMetaData::try_from_thrift(descriptor.clone(), column_chunk)
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        Ok(RowGroupMetaData {
+            columns,
+            num_rows,
+            total_byte_size,
+        })
+    }
+
+    /// Method to convert to Thrift.
+    pub(crate) fn into_thrift(self) -> RowGroup {
+        let file_offset = self
+            .columns
+            .iter()
+            .map(|c| {
+                ColumnOffsetsMetadata::from_column_chunk_metadata(c).calc_row_group_file_offset()
+            })
+            .next()
+            .unwrap_or(None);
+        let total_compressed_size = Some(self.compressed_size() as i64);
+        RowGroup {
+            columns: self.columns.into_iter().map(|v| v.into_thrift()).collect(),
+            total_byte_size: self.total_byte_size as i64,
+            num_rows: self.num_rows as i64,
+            sorting_columns: None,
+            file_offset,
+            total_compressed_size,
+            ordinal: None,
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/metadata/schema_descriptor.rs b/crates/polars-parquet/src/parquet/metadata/schema_descriptor.rs
new file mode 100644
index 000000000000..a5a3a7b10735
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/schema_descriptor.rs
@@ -0,0 +1,141 @@
+use parquet_format_safe::SchemaElement;
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use super::column_descriptor::{ColumnDescriptor, Descriptor};
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::io_message::from_message;
+use crate::parquet::schema::types::{FieldInfo, ParquetType};
+use crate::parquet::schema::Repetition;
+
+/// A schema descriptor. This encapsulates the top-level schemas for all the columns,
+/// as well as all descriptors for all the primitive columns.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct SchemaDescriptor {
+    name: String,
+    // The top-level schema (the "message" type).
+    fields: Vec<ParquetType>,
+
+    // All the descriptors for primitive columns in this schema, constructed from
+    // `schema` in DFS order.
+    leaves: Vec<ColumnDescriptor>,
+}
+
+impl SchemaDescriptor {
+    /// Creates new schema descriptor from Parquet schema.
+    pub fn new(name: String, fields: Vec<ParquetType>) -> Self {
+        let mut leaves = vec![];
+        for f in &fields {
+            let mut path = vec![];
+            build_tree(f, f, 0, 0, &mut leaves, &mut path);
+        }
+
+        Self {
+            name,
+            fields,
+            leaves,
+        }
+    }
+
+    /// The [`ColumnDescriptor`] (leafs) of this schema.
+    ///
+    /// Note that, for nested fields, this may contain more entries than the number of fields
+    /// in the file - e.g. a struct field may have two columns.
+    pub fn columns(&self) -> &[ColumnDescriptor] {
+        &self.leaves
+    }
+
+    /// The schemas' name.
+    pub fn name(&self) -> &str {
+        &self.name
+    }
+
+    /// The schemas' fields.
+    pub fn fields(&self) -> &[ParquetType] {
+        &self.fields
+    }
+
+    pub(crate) fn into_thrift(self) -> Vec<SchemaElement> {
+        ParquetType::GroupType {
+            field_info: FieldInfo {
+                name: self.name,
+                repetition: Repetition::Optional,
+                id: None,
+            },
+            logical_type: None,
+            converted_type: None,
+            fields: self.fields,
+        }
+        .to_thrift()
+    }
+
+    fn try_from_type(type_: ParquetType) -> Result<Self> {
+        match type_ {
+            ParquetType::GroupType {
+                field_info, fields, ..
+            } => Ok(Self::new(field_info.name, fields)),
+            _ => Err(Error::oos("The parquet schema MUST be a group type")),
+        }
+    }
+
+    pub(crate) fn try_from_thrift(elements: &[SchemaElement]) -> Result<Self> {
+        let schema = ParquetType::try_from_thrift(elements)?;
+        Self::try_from_type(schema)
+    }
+
+    /// Creates a schema from
+    pub fn try_from_message(message: &str) -> Result<Self> {
+        let schema = from_message(message)?;
+        Self::try_from_type(schema)
+    }
+}
+
+fn build_tree<'a>(
+    tp: &'a ParquetType,
+    base_tp: &ParquetType,
+    mut max_rep_level: i16,
+    mut max_def_level: i16,
+    leaves: &mut Vec<ColumnDescriptor>,
+    path_so_far: &mut Vec<&'a str>,
+) {
+    path_so_far.push(tp.name());
+    match tp.get_field_info().repetition {
+        Repetition::Optional => {
+            max_def_level += 1;
+        },
+        Repetition::Repeated => {
+            max_def_level += 1;
+            max_rep_level += 1;
+        },
+        _ => {},
+    }
+
+    match tp {
+        ParquetType::PrimitiveType(p) => {
+            let path_in_schema = path_so_far.iter().copied().map(String::from).collect();
+            leaves.push(ColumnDescriptor::new(
+                Descriptor {
+                    primitive_type: p.clone(),
+                    max_def_level,
+                    max_rep_level,
+                },
+                path_in_schema,
+                base_tp.clone(),
+            ));
+        },
+        ParquetType::GroupType { ref fields, .. } => {
+            for f in fields {
+                build_tree(
+                    f,
+                    base_tp,
+                    max_rep_level,
+                    max_def_level,
+                    leaves,
+                    path_so_far,
+                );
+                path_so_far.pop();
+            }
+        },
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/metadata/sort.rs b/crates/polars-parquet/src/parquet/metadata/sort.rs
new file mode 100644
index 000000000000..93aac06605b6
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/metadata/sort.rs
@@ -0,0 +1,94 @@
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use crate::parquet::schema::types::{
+    IntegerType, PhysicalType, PrimitiveConvertedType, PrimitiveLogicalType,
+};
+
+/// Sort order for page and column statistics.
+///
+/// Types are associated with sort orders and column stats are aggregated using a sort
+/// order, and a sort order should be considered when comparing values with statistics
+/// min/max.
+///
+/// See reference in
+/// <https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h>
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum SortOrder {
+    /// Signed (either value or legacy byte-wise) comparison.
+    Signed,
+    /// Unsigned (depending on physical type either value or byte-wise) comparison.
+    Unsigned,
+    /// Comparison is undefined.
+    Undefined,
+}
+
+/// Returns sort order for a physical/logical type.
+pub fn get_sort_order(
+    logical_type: &Option<PrimitiveLogicalType>,
+    converted_type: &Option<PrimitiveConvertedType>,
+    physical_type: &PhysicalType,
+) -> SortOrder {
+    if let Some(logical_type) = logical_type {
+        return get_logical_sort_order(logical_type);
+    };
+    if let Some(converted_type) = converted_type {
+        return get_converted_sort_order(converted_type);
+    };
+    get_physical_sort_order(physical_type)
+}
+
+fn get_logical_sort_order(logical_type: &PrimitiveLogicalType) -> SortOrder {
+    // TODO: Should this take converted and logical type, for compatibility?
+    use PrimitiveLogicalType::*;
+    match logical_type {
+        String | Enum | Json | Bson => SortOrder::Unsigned,
+        Integer(t) => match t {
+            IntegerType::Int8 | IntegerType::Int16 | IntegerType::Int32 | IntegerType::Int64 => {
+                SortOrder::Signed
+            },
+            _ => SortOrder::Unsigned,
+        },
+        Decimal(_, _) => SortOrder::Signed,
+        Date => SortOrder::Signed,
+        Time { .. } => SortOrder::Signed,
+        Timestamp { .. } => SortOrder::Signed,
+        Unknown => SortOrder::Undefined,
+        Uuid => SortOrder::Unsigned,
+    }
+}
+
+fn get_converted_sort_order(converted_type: &PrimitiveConvertedType) -> SortOrder {
+    use PrimitiveConvertedType::*;
+    match converted_type {
+        // Unsigned byte-wise comparison.
+        Utf8 | Json | Bson | Enum => SortOrder::Unsigned,
+        Int8 | Int16 | Int32 | Int64 => SortOrder::Signed,
+        Uint8 | Uint16 | Uint32 | Uint64 => SortOrder::Unsigned,
+        // Signed comparison of the represented value.
+        Decimal(_, _) => SortOrder::Signed,
+        Date => SortOrder::Signed,
+        TimeMillis | TimeMicros | TimestampMillis | TimestampMicros => SortOrder::Signed,
+        Interval => SortOrder::Undefined,
+    }
+}
+
+fn get_physical_sort_order(physical_type: &PhysicalType) -> SortOrder {
+    use PhysicalType::*;
+    match physical_type {
+        // Order: false, true
+        Boolean => SortOrder::Unsigned,
+        Int32 | Int64 => SortOrder::Signed,
+        Int96 => SortOrder::Undefined,
+        // Notes to remember when comparing float/double values:
+        // If the min is a NaN, it should be ignored.
+        // If the max is a NaN, it should be ignored.
+        // If the min is +0, the row group may contain -0 values as well.
+        // If the max is -0, the row group may contain +0 values as well.
+        // When looking for NaN values, min and max should be ignored.
+        Float | Double => SortOrder::Signed,
+        // Unsigned byte-wise comparison
+        ByteArray | FixedLenByteArray(_) => SortOrder::Unsigned,
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/mod.rs b/crates/polars-parquet/src/parquet/mod.rs
new file mode 100644
index 000000000000..05166f650e2f
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/mod.rs
@@ -0,0 +1,37 @@
+#[macro_use]
+pub mod error;
+#[cfg(feature = "bloom_filter")]
+pub mod bloom_filter;
+pub mod compression;
+pub mod deserialize;
+pub mod encoding;
+pub mod indexes;
+pub mod metadata;
+pub mod page;
+mod parquet_bridge;
+pub mod read;
+pub mod schema;
+pub mod statistics;
+pub mod types;
+pub mod write;
+
+use parquet_format_safe as thrift_format;
+pub use streaming_decompression::{fallible_streaming_iterator, FallibleStreamingIterator};
+
+const HEADER_SIZE: u64 = PARQUET_MAGIC.len() as u64;
+const FOOTER_SIZE: u64 = 8;
+const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
+
+/// The number of bytes read at the end of the parquet file on first read
+const DEFAULT_FOOTER_READ_SIZE: u64 = 64 * 1024;
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    pub fn get_path() -> PathBuf {
+        let dir = env!("CARGO_MANIFEST_DIR");
+
+        PathBuf::from(dir).join("testing/parquet-testing/data")
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/page/mod.rs b/crates/polars-parquet/src/parquet/page/mod.rs
new file mode 100644
index 000000000000..0fcf3a635469
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/page/mod.rs
@@ -0,0 +1,428 @@
+use std::sync::Arc;
+
+use crate::parquet::compression::Compression;
+use crate::parquet::encoding::{get_length, Encoding};
+use crate::parquet::error::{Error, Result};
+use crate::parquet::indexes::Interval;
+use crate::parquet::metadata::Descriptor;
+pub use crate::parquet::parquet_bridge::{DataPageHeaderExt, PageType};
+use crate::parquet::statistics::{deserialize_statistics, Statistics};
+pub use crate::parquet::thrift_format::{
+    DataPageHeader as DataPageHeaderV1, DataPageHeaderV2, PageHeader as ParquetPageHeader,
+};
+
+/// A [`CompressedDataPage`] is compressed, encoded representation of a Parquet data page.
+/// It holds actual data and thus cloning it is expensive.
+#[derive(Debug)]
+pub struct CompressedDataPage {
+    pub(crate) header: DataPageHeader,
+    pub(crate) buffer: Vec<u8>,
+    pub(crate) compression: Compression,
+    uncompressed_page_size: usize,
+    pub(crate) descriptor: Descriptor,
+
+    // The offset and length in rows
+    pub(crate) selected_rows: Option<Vec<Interval>>,
+}
+
+impl CompressedDataPage {
+    /// Returns a new [`CompressedDataPage`].
+    pub fn new(
+        header: DataPageHeader,
+        buffer: Vec<u8>,
+        compression: Compression,
+        uncompressed_page_size: usize,
+        descriptor: Descriptor,
+        rows: Option<usize>,
+    ) -> Self {
+        Self::new_read(
+            header,
+            buffer,
+            compression,
+            uncompressed_page_size,
+            descriptor,
+            rows.map(|x| vec![Interval::new(0, x)]),
+        )
+    }
+
+    /// Returns a new [`CompressedDataPage`].
+    pub(crate) fn new_read(
+        header: DataPageHeader,
+        buffer: Vec<u8>,
+        compression: Compression,
+        uncompressed_page_size: usize,
+        descriptor: Descriptor,
+        selected_rows: Option<Vec<Interval>>,
+    ) -> Self {
+        Self {
+            header,
+            buffer,
+            compression,
+            uncompressed_page_size,
+            descriptor,
+            selected_rows,
+        }
+    }
+
+    pub fn header(&self) -> &DataPageHeader {
+        &self.header
+    }
+
+    pub fn uncompressed_size(&self) -> usize {
+        self.uncompressed_page_size
+    }
+
+    pub fn compressed_size(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// The compression of the data in this page.
+    /// Note that what is compressed in a page depends on its version:
+    /// in V1, the whole data (`[repetition levels][definition levels][values]`) is compressed; in V2 only the values are compressed.
+    pub fn compression(&self) -> Compression {
+        self.compression
+    }
+
+    /// the rows to be selected by this page.
+    /// When `None`, all rows are to be considered.
+    pub fn selected_rows(&self) -> Option<&[Interval]> {
+        self.selected_rows.as_deref()
+    }
+
+    pub fn num_values(&self) -> usize {
+        self.header.num_values()
+    }
+
+    /// Decodes the raw statistics into a statistics
+    pub fn statistics(&self) -> Option<Result<Arc<dyn Statistics>>> {
+        match &self.header {
+            DataPageHeader::V1(d) => d
+                .statistics
+                .as_ref()
+                .map(|x| deserialize_statistics(x, self.descriptor.primitive_type.clone())),
+            DataPageHeader::V2(d) => d
+                .statistics
+                .as_ref()
+                .map(|x| deserialize_statistics(x, self.descriptor.primitive_type.clone())),
+        }
+    }
+
+    #[inline]
+    pub fn select_rows(&mut self, selected_rows: Vec<Interval>) {
+        self.selected_rows = Some(selected_rows);
+    }
+}
+
+#[derive(Debug, Clone)]
+pub enum DataPageHeader {
+    V1(DataPageHeaderV1),
+    V2(DataPageHeaderV2),
+}
+
+impl DataPageHeader {
+    pub fn num_values(&self) -> usize {
+        match &self {
+            DataPageHeader::V1(d) => d.num_values as usize,
+            DataPageHeader::V2(d) => d.num_values as usize,
+        }
+    }
+}
+
+/// A [`DataPage`] is an uncompressed, encoded representation of a Parquet data page. It holds actual data
+/// and thus cloning it is expensive.
+#[derive(Debug, Clone)]
+pub struct DataPage {
+    pub(super) header: DataPageHeader,
+    pub(super) buffer: Vec<u8>,
+    pub descriptor: Descriptor,
+    pub selected_rows: Option<Vec<Interval>>,
+}
+
+impl DataPage {
+    pub fn new(
+        header: DataPageHeader,
+        buffer: Vec<u8>,
+        descriptor: Descriptor,
+        rows: Option<usize>,
+    ) -> Self {
+        Self::new_read(
+            header,
+            buffer,
+            descriptor,
+            rows.map(|x| vec![Interval::new(0, x)]),
+        )
+    }
+
+    pub(crate) fn new_read(
+        header: DataPageHeader,
+        buffer: Vec<u8>,
+        descriptor: Descriptor,
+        selected_rows: Option<Vec<Interval>>,
+    ) -> Self {
+        Self {
+            header,
+            buffer,
+            descriptor,
+            selected_rows,
+        }
+    }
+
+    pub fn header(&self) -> &DataPageHeader {
+        &self.header
+    }
+
+    pub fn buffer(&self) -> &[u8] {
+        &self.buffer
+    }
+
+    /// the rows to be selected by this page.
+    /// When `None`, all rows are to be considered.
+    pub fn selected_rows(&self) -> Option<&[Interval]> {
+        self.selected_rows.as_deref()
+    }
+
+    /// Returns a mutable reference to the internal buffer.
+    /// Useful to recover the buffer after the page has been decoded.
+    pub fn buffer_mut(&mut self) -> &mut Vec<u8> {
+        &mut self.buffer
+    }
+
+    pub fn num_values(&self) -> usize {
+        self.header.num_values()
+    }
+
+    pub fn encoding(&self) -> Encoding {
+        match &self.header {
+            DataPageHeader::V1(d) => d.encoding(),
+            DataPageHeader::V2(d) => d.encoding(),
+        }
+    }
+
+    pub fn definition_level_encoding(&self) -> Encoding {
+        match &self.header {
+            DataPageHeader::V1(d) => d.definition_level_encoding(),
+            DataPageHeader::V2(_) => Encoding::Rle,
+        }
+    }
+
+    pub fn repetition_level_encoding(&self) -> Encoding {
+        match &self.header {
+            DataPageHeader::V1(d) => d.repetition_level_encoding(),
+            DataPageHeader::V2(_) => Encoding::Rle,
+        }
+    }
+
+    /// Decodes the raw statistics into a statistics
+    pub fn statistics(&self) -> Option<Result<Arc<dyn Statistics>>> {
+        match &self.header {
+            DataPageHeader::V1(d) => d
+                .statistics
+                .as_ref()
+                .map(|x| deserialize_statistics(x, self.descriptor.primitive_type.clone())),
+            DataPageHeader::V2(d) => d
+                .statistics
+                .as_ref()
+                .map(|x| deserialize_statistics(x, self.descriptor.primitive_type.clone())),
+        }
+    }
+}
+
+/// A [`Page`] is an uncompressed, encoded representation of a Parquet page. It may hold actual data
+/// and thus cloning it may be expensive.
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)]
+pub enum Page {
+    /// A [`DataPage`]
+    Data(DataPage),
+    /// A [`DictPage`]
+    Dict(DictPage),
+}
+
+impl Page {
+    pub(crate) fn buffer(&mut self) -> &mut Vec<u8> {
+        match self {
+            Self::Data(page) => &mut page.buffer,
+            Self::Dict(page) => &mut page.buffer,
+        }
+    }
+}
+
+/// A [`CompressedPage`] is a compressed, encoded representation of a Parquet page. It holds actual data
+/// and thus cloning it is expensive.
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)]
+pub enum CompressedPage {
+    Data(CompressedDataPage),
+    Dict(CompressedDictPage),
+}
+
+impl CompressedPage {
+    pub(crate) fn buffer(&mut self) -> &mut Vec<u8> {
+        match self {
+            CompressedPage::Data(page) => &mut page.buffer,
+            CompressedPage::Dict(page) => &mut page.buffer,
+        }
+    }
+
+    pub(crate) fn compression(&self) -> Compression {
+        match self {
+            CompressedPage::Data(page) => page.compression(),
+            CompressedPage::Dict(page) => page.compression(),
+        }
+    }
+
+    pub(crate) fn num_values(&self) -> usize {
+        match self {
+            CompressedPage::Data(page) => page.num_values(),
+            CompressedPage::Dict(_) => 0,
+        }
+    }
+
+    pub(crate) fn selected_rows(&self) -> Option<&[Interval]> {
+        match self {
+            CompressedPage::Data(page) => page.selected_rows(),
+            CompressedPage::Dict(_) => None,
+        }
+    }
+
+    pub(crate) fn uncompressed_size(&self) -> usize {
+        match self {
+            CompressedPage::Data(page) => page.uncompressed_page_size,
+            CompressedPage::Dict(page) => page.uncompressed_page_size,
+        }
+    }
+}
+
+/// An uncompressed, encoded dictionary page.
+#[derive(Debug)]
+pub struct DictPage {
+    pub buffer: Vec<u8>,
+    pub num_values: usize,
+    pub is_sorted: bool,
+}
+
+impl DictPage {
+    pub fn new(buffer: Vec<u8>, num_values: usize, is_sorted: bool) -> Self {
+        Self {
+            buffer,
+            num_values,
+            is_sorted,
+        }
+    }
+}
+
+/// A compressed, encoded dictionary page.
+#[derive(Debug)]
+pub struct CompressedDictPage {
+    pub(crate) buffer: Vec<u8>,
+    compression: Compression,
+    pub(crate) num_values: usize,
+    pub(crate) uncompressed_page_size: usize,
+    pub is_sorted: bool,
+}
+
+impl CompressedDictPage {
+    pub fn new(
+        buffer: Vec<u8>,
+        compression: Compression,
+        uncompressed_page_size: usize,
+        num_values: usize,
+        is_sorted: bool,
+    ) -> Self {
+        Self {
+            buffer,
+            compression,
+            uncompressed_page_size,
+            num_values,
+            is_sorted,
+        }
+    }
+
+    /// The compression of the data in this page.
+    pub fn compression(&self) -> Compression {
+        self.compression
+    }
+}
+
+/// Splits the page buffer into 3 slices corresponding to (encoded rep levels, encoded def levels, encoded values) for v1 pages.
+#[inline]
+pub fn split_buffer_v1(
+    buffer: &[u8],
+    has_rep: bool,
+    has_def: bool,
+) -> Result<(&[u8], &[u8], &[u8])> {
+    let (rep, buffer) = if has_rep {
+        let level_buffer_length = get_length(buffer).ok_or_else(|| {
+            Error::oos("The number of bytes declared in v1 rep levels is higher than the page size")
+        })?;
+        (
+            buffer.get(4..4 + level_buffer_length).ok_or_else(|| {
+                Error::oos(
+                    "The number of bytes declared in v1 rep levels is higher than the page size",
+                )
+            })?,
+            buffer.get(4 + level_buffer_length..).ok_or_else(|| {
+                Error::oos(
+                    "The number of bytes declared in v1 rep levels is higher than the page size",
+                )
+            })?,
+        )
+    } else {
+        (&[] as &[u8], buffer)
+    };
+
+    let (def, buffer) = if has_def {
+        let level_buffer_length = get_length(buffer).ok_or_else(|| {
+            Error::oos("The number of bytes declared in v1 rep levels is higher than the page size")
+        })?;
+        (
+            buffer.get(4..4 + level_buffer_length).ok_or_else(|| {
+                Error::oos(
+                    "The number of bytes declared in v1 def levels is higher than the page size",
+                )
+            })?,
+            buffer.get(4 + level_buffer_length..).ok_or_else(|| {
+                Error::oos(
+                    "The number of bytes declared in v1 def levels is higher than the page size",
+                )
+            })?,
+        )
+    } else {
+        (&[] as &[u8], buffer)
+    };
+
+    Ok((rep, def, buffer))
+}
+
+/// Splits the page buffer into 3 slices corresponding to (encoded rep levels, encoded def levels, encoded values) for v2 pages.
+pub fn split_buffer_v2(
+    buffer: &[u8],
+    rep_level_buffer_length: usize,
+    def_level_buffer_length: usize,
+) -> Result<(&[u8], &[u8], &[u8])> {
+    Ok((
+        &buffer[..rep_level_buffer_length],
+        &buffer[rep_level_buffer_length..rep_level_buffer_length + def_level_buffer_length],
+        &buffer[rep_level_buffer_length + def_level_buffer_length..],
+    ))
+}
+
+/// Splits the page buffer into 3 slices corresponding to (encoded rep levels, encoded def levels, encoded values).
+pub fn split_buffer(page: &DataPage) -> Result<(&[u8], &[u8], &[u8])> {
+    match page.header() {
+        DataPageHeader::V1(_) => split_buffer_v1(
+            page.buffer(),
+            page.descriptor.max_rep_level > 0,
+            page.descriptor.max_def_level > 0,
+        ),
+        DataPageHeader::V2(header) => {
+            let def_level_buffer_length: usize = header.definition_levels_byte_length.try_into()?;
+            let rep_level_buffer_length: usize = header.repetition_levels_byte_length.try_into()?;
+            split_buffer_v2(
+                page.buffer(),
+                rep_level_buffer_length,
+                def_level_buffer_length,
+            )
+        },
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/parquet_bridge.rs b/crates/polars-parquet/src/parquet/parquet_bridge.rs
new file mode 100644
index 000000000000..eec75e4994ca
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/parquet_bridge.rs
@@ -0,0 +1,704 @@
+// Bridges structs from thrift-generated code to rust enums.
+use std::convert::TryFrom;
+
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use super::thrift_format::{
+    BoundaryOrder as ParquetBoundaryOrder, CompressionCodec, DataPageHeader, DataPageHeaderV2,
+    DecimalType, Encoding as ParquetEncoding, FieldRepetitionType, IntType,
+    LogicalType as ParquetLogicalType, PageType as ParquetPageType, TimeType,
+    TimeUnit as ParquetTimeUnit, TimestampType,
+};
+use crate::parquet::error::Error;
+
+/// The repetition of a parquet field
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum Repetition {
+    /// When the field has no null values
+    Required,
+    /// When the field may have null values
+    Optional,
+    /// When the field may be repeated (list field)
+    Repeated,
+}
+
+impl TryFrom<FieldRepetitionType> for Repetition {
+    type Error = Error;
+
+    fn try_from(repetition: FieldRepetitionType) -> Result<Self, Self::Error> {
+        Ok(match repetition {
+            FieldRepetitionType::REQUIRED => Repetition::Required,
+            FieldRepetitionType::OPTIONAL => Repetition::Optional,
+            FieldRepetitionType::REPEATED => Repetition::Repeated,
+            _ => return Err(Error::oos("Thrift out of range")),
+        })
+    }
+}
+
+impl From<Repetition> for FieldRepetitionType {
+    fn from(repetition: Repetition) -> Self {
+        match repetition {
+            Repetition::Required => FieldRepetitionType::REQUIRED,
+            Repetition::Optional => FieldRepetitionType::OPTIONAL,
+            Repetition::Repeated => FieldRepetitionType::REPEATED,
+        }
+    }
+}
+
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum Compression {
+    Uncompressed,
+    Snappy,
+    Gzip,
+    Lzo,
+    Brotli,
+    Lz4,
+    Zstd,
+    Lz4Raw,
+}
+
+impl TryFrom<CompressionCodec> for Compression {
+    type Error = Error;
+
+    fn try_from(codec: CompressionCodec) -> Result<Self, Self::Error> {
+        Ok(match codec {
+            CompressionCodec::UNCOMPRESSED => Compression::Uncompressed,
+            CompressionCodec::SNAPPY => Compression::Snappy,
+            CompressionCodec::GZIP => Compression::Gzip,
+            CompressionCodec::LZO => Compression::Lzo,
+            CompressionCodec::BROTLI => Compression::Brotli,
+            CompressionCodec::LZ4 => Compression::Lz4,
+            CompressionCodec::ZSTD => Compression::Zstd,
+            CompressionCodec::LZ4_RAW => Compression::Lz4Raw,
+            _ => return Err(Error::oos("Thrift out of range")),
+        })
+    }
+}
+
+impl From<Compression> for CompressionCodec {
+    fn from(codec: Compression) -> Self {
+        match codec {
+            Compression::Uncompressed => CompressionCodec::UNCOMPRESSED,
+            Compression::Snappy => CompressionCodec::SNAPPY,
+            Compression::Gzip => CompressionCodec::GZIP,
+            Compression::Lzo => CompressionCodec::LZO,
+            Compression::Brotli => CompressionCodec::BROTLI,
+            Compression::Lz4 => CompressionCodec::LZ4,
+            Compression::Zstd => CompressionCodec::ZSTD,
+            Compression::Lz4Raw => CompressionCodec::LZ4_RAW,
+        }
+    }
+}
+
+/// Defines the compression settings for writing a parquet file.
+///
+/// If None is provided as a compression setting, then the default compression level is used.
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub enum CompressionOptions {
+    Uncompressed,
+    Snappy,
+    Gzip(Option<GzipLevel>),
+    Lzo,
+    Brotli(Option<BrotliLevel>),
+    Lz4,
+    Zstd(Option<ZstdLevel>),
+    Lz4Raw,
+}
+
+impl From<CompressionOptions> for Compression {
+    fn from(value: CompressionOptions) -> Self {
+        match value {
+            CompressionOptions::Uncompressed => Compression::Uncompressed,
+            CompressionOptions::Snappy => Compression::Snappy,
+            CompressionOptions::Gzip(_) => Compression::Gzip,
+            CompressionOptions::Lzo => Compression::Lzo,
+            CompressionOptions::Brotli(_) => Compression::Brotli,
+            CompressionOptions::Lz4 => Compression::Lz4,
+            CompressionOptions::Zstd(_) => Compression::Zstd,
+            CompressionOptions::Lz4Raw => Compression::Lz4Raw,
+        }
+    }
+}
+
+impl From<CompressionOptions> for CompressionCodec {
+    fn from(codec: CompressionOptions) -> Self {
+        match codec {
+            CompressionOptions::Uncompressed => CompressionCodec::UNCOMPRESSED,
+            CompressionOptions::Snappy => CompressionCodec::SNAPPY,
+            CompressionOptions::Gzip(_) => CompressionCodec::GZIP,
+            CompressionOptions::Lzo => CompressionCodec::LZO,
+            CompressionOptions::Brotli(_) => CompressionCodec::BROTLI,
+            CompressionOptions::Lz4 => CompressionCodec::LZ4,
+            CompressionOptions::Zstd(_) => CompressionCodec::ZSTD,
+            CompressionOptions::Lz4Raw => CompressionCodec::LZ4_RAW,
+        }
+    }
+}
+
+/// Defines valid compression levels.
+pub(crate) trait CompressionLevel<T: std::fmt::Display + std::cmp::PartialOrd> {
+    const MINIMUM_LEVEL: T;
+    const MAXIMUM_LEVEL: T;
+
+    /// Tests if the provided compression level is valid.
+    fn is_valid_level(level: T) -> Result<(), Error> {
+        let compression_range = Self::MINIMUM_LEVEL..=Self::MAXIMUM_LEVEL;
+        if compression_range.contains(&level) {
+            Ok(())
+        } else {
+            Err(Error::InvalidParameter(format!(
+                "valid compression range {}..={} exceeded.",
+                compression_range.start(),
+                compression_range.end()
+            )))
+        }
+    }
+}
+
+/// Represents a valid brotli compression level.
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub struct BrotliLevel(u32);
+
+impl Default for BrotliLevel {
+    fn default() -> Self {
+        Self(1)
+    }
+}
+
+impl CompressionLevel<u32> for BrotliLevel {
+    const MINIMUM_LEVEL: u32 = 0;
+    const MAXIMUM_LEVEL: u32 = 11;
+}
+
+impl BrotliLevel {
+    /// Attempts to create a brotli compression level.
+    ///
+    /// Compression levels must be valid.
+    pub fn try_new(level: u32) -> Result<Self, Error> {
+        Self::is_valid_level(level).map(|_| Self(level))
+    }
+
+    /// Returns the compression level.
+    pub fn compression_level(&self) -> u32 {
+        self.0
+    }
+}
+
+/// Represents a valid gzip compression level.
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub struct GzipLevel(u8);
+
+impl Default for GzipLevel {
+    fn default() -> Self {
+        // The default as of miniz_oxide 0.5.1 is 6 for compression level
+        // (miniz_oxide::deflate::CompressionLevel::DefaultLevel)
+        Self(6)
+    }
+}
+
+impl CompressionLevel<u8> for GzipLevel {
+    const MINIMUM_LEVEL: u8 = 0;
+    const MAXIMUM_LEVEL: u8 = 10;
+}
+
+impl GzipLevel {
+    /// Attempts to create a gzip compression level.
+    ///
+    /// Compression levels must be valid (i.e. be acceptable for [`flate2::Compression`]).
+    pub fn try_new(level: u8) -> Result<Self, Error> {
+        Self::is_valid_level(level).map(|_| Self(level))
+    }
+
+    /// Returns the compression level.
+    pub fn compression_level(&self) -> u8 {
+        self.0
+    }
+}
+
+#[cfg(feature = "gzip")]
+impl From<GzipLevel> for flate2::Compression {
+    fn from(level: GzipLevel) -> Self {
+        Self::new(level.compression_level() as u32)
+    }
+}
+
+/// Represents a valid zstd compression level.
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub struct ZstdLevel(i32);
+
+impl CompressionLevel<i32> for ZstdLevel {
+    // zstd binds to C, and hence zstd::compression_level_range() is not const as this calls the
+    // underlying C library.
+    const MINIMUM_LEVEL: i32 = 1;
+    const MAXIMUM_LEVEL: i32 = 22;
+}
+
+impl ZstdLevel {
+    /// Attempts to create a zstd compression level from a given compression level.
+    ///
+    /// Compression levels must be valid (i.e. be acceptable for [`zstd::compression_level_range`]).
+    pub fn try_new(level: i32) -> Result<Self, Error> {
+        Self::is_valid_level(level).map(|_| Self(level))
+    }
+
+    /// Returns the compression level.
+    pub fn compression_level(&self) -> i32 {
+        self.0
+    }
+}
+
+#[cfg(feature = "zstd")]
+impl Default for ZstdLevel {
+    fn default() -> Self {
+        Self(zstd::DEFAULT_COMPRESSION_LEVEL)
+    }
+}
+
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub enum PageType {
+    DataPage,
+    DataPageV2,
+    DictionaryPage,
+}
+
+impl TryFrom<ParquetPageType> for PageType {
+    type Error = Error;
+
+    fn try_from(type_: ParquetPageType) -> Result<Self, Self::Error> {
+        Ok(match type_ {
+            ParquetPageType::DATA_PAGE => PageType::DataPage,
+            ParquetPageType::DATA_PAGE_V2 => PageType::DataPageV2,
+            ParquetPageType::DICTIONARY_PAGE => PageType::DictionaryPage,
+            _ => return Err(Error::oos("Thrift out of range")),
+        })
+    }
+}
+
+impl From<PageType> for ParquetPageType {
+    fn from(type_: PageType) -> Self {
+        match type_ {
+            PageType::DataPage => ParquetPageType::DATA_PAGE,
+            PageType::DataPageV2 => ParquetPageType::DATA_PAGE_V2,
+            PageType::DictionaryPage => ParquetPageType::DICTIONARY_PAGE,
+        }
+    }
+}
+
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub enum Encoding {
+    /// Default encoding.
+    /// BOOLEAN - 1 bit per value. 0 is false; 1 is true.
+    /// INT32 - 4 bytes per value.  Stored as little-endian.
+    /// INT64 - 8 bytes per value.  Stored as little-endian.
+    /// FLOAT - 4 bytes per value.  IEEE. Stored as little-endian.
+    /// DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian.
+    /// BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
+    /// FIXED_LEN_BYTE_ARRAY - Just the bytes.
+    Plain,
+    /// Deprecated: Dictionary encoding. The values in the dictionary are encoded in the
+    /// plain type.
+    /// in a data page use RLE_DICTIONARY instead.
+    /// in a Dictionary page use PLAIN instead
+    PlainDictionary,
+    /// Group packed run length encoding. Usable for definition/repetition levels
+    /// encoding and Booleans (on one bit: 0 is false; 1 is true.)
+    Rle,
+    /// Bit packed encoding.  This can only be used if the data has a known max
+    /// width.  Usable for definition/repetition levels encoding.
+    BitPacked,
+    /// Delta encoding for integers. This can be used for int columns and works best
+    /// on sorted data
+    DeltaBinaryPacked,
+    /// Encoding for byte arrays to separate the length values and the data. The lengths
+    /// are encoded using DELTA_BINARY_PACKED
+    DeltaLengthByteArray,
+    /// Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED.
+    /// Suffixes are stored as delta length byte arrays.
+    DeltaByteArray,
+    /// Dictionary encoding: the ids are encoded using the RLE encoding
+    RleDictionary,
+    /// Encoding for floating-point data.
+    /// K byte-streams are created where K is the size in bytes of the data type.
+    /// The individual bytes of an FP value are scattered to the corresponding stream and
+    /// the streams are concatenated.
+    /// This itself does not reduce the size of the data but can lead to better compression
+    /// afterwards.
+    ByteStreamSplit,
+}
+
+impl TryFrom<ParquetEncoding> for Encoding {
+    type Error = Error;
+
+    fn try_from(encoding: ParquetEncoding) -> Result<Self, Self::Error> {
+        Ok(match encoding {
+            ParquetEncoding::PLAIN => Encoding::Plain,
+            ParquetEncoding::PLAIN_DICTIONARY => Encoding::PlainDictionary,
+            ParquetEncoding::RLE => Encoding::Rle,
+            ParquetEncoding::BIT_PACKED => Encoding::BitPacked,
+            ParquetEncoding::DELTA_BINARY_PACKED => Encoding::DeltaBinaryPacked,
+            ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DeltaLengthByteArray,
+            ParquetEncoding::DELTA_BYTE_ARRAY => Encoding::DeltaByteArray,
+            ParquetEncoding::RLE_DICTIONARY => Encoding::RleDictionary,
+            ParquetEncoding::BYTE_STREAM_SPLIT => Encoding::ByteStreamSplit,
+            _ => return Err(Error::oos("Thrift out of range")),
+        })
+    }
+}
+
+impl From<Encoding> for ParquetEncoding {
+    fn from(encoding: Encoding) -> Self {
+        match encoding {
+            Encoding::Plain => ParquetEncoding::PLAIN,
+            Encoding::PlainDictionary => ParquetEncoding::PLAIN_DICTIONARY,
+            Encoding::Rle => ParquetEncoding::RLE,
+            Encoding::BitPacked => ParquetEncoding::BIT_PACKED,
+            Encoding::DeltaBinaryPacked => ParquetEncoding::DELTA_BINARY_PACKED,
+            Encoding::DeltaLengthByteArray => ParquetEncoding::DELTA_LENGTH_BYTE_ARRAY,
+            Encoding::DeltaByteArray => ParquetEncoding::DELTA_BYTE_ARRAY,
+            Encoding::RleDictionary => ParquetEncoding::RLE_DICTIONARY,
+            Encoding::ByteStreamSplit => ParquetEncoding::BYTE_STREAM_SPLIT,
+        }
+    }
+}
+
+/// Enum to annotate whether lists of min/max elements inside ColumnIndex
+/// are ordered and if so, in which direction.
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
+pub enum BoundaryOrder {
+    Unordered,
+    Ascending,
+    Descending,
+}
+
+impl Default for BoundaryOrder {
+    fn default() -> Self {
+        Self::Unordered
+    }
+}
+
+impl TryFrom<ParquetBoundaryOrder> for BoundaryOrder {
+    type Error = Error;
+
+    fn try_from(encoding: ParquetBoundaryOrder) -> Result<Self, Self::Error> {
+        Ok(match encoding {
+            ParquetBoundaryOrder::UNORDERED => BoundaryOrder::Unordered,
+            ParquetBoundaryOrder::ASCENDING => BoundaryOrder::Ascending,
+            ParquetBoundaryOrder::DESCENDING => BoundaryOrder::Descending,
+            _ => return Err(Error::oos("BoundaryOrder Thrift value out of range")),
+        })
+    }
+}
+
+impl From<BoundaryOrder> for ParquetBoundaryOrder {
+    fn from(encoding: BoundaryOrder) -> Self {
+        match encoding {
+            BoundaryOrder::Unordered => ParquetBoundaryOrder::UNORDERED,
+            BoundaryOrder::Ascending => ParquetBoundaryOrder::ASCENDING,
+            BoundaryOrder::Descending => ParquetBoundaryOrder::DESCENDING,
+        }
+    }
+}
+
+pub trait DataPageHeaderExt {
+    fn encoding(&self) -> Encoding;
+    fn repetition_level_encoding(&self) -> Encoding;
+    fn definition_level_encoding(&self) -> Encoding;
+}
+
+impl DataPageHeaderExt for DataPageHeader {
+    fn encoding(&self) -> Encoding {
+        self.encoding.try_into().unwrap()
+    }
+
+    fn repetition_level_encoding(&self) -> Encoding {
+        self.repetition_level_encoding.try_into().unwrap()
+    }
+
+    fn definition_level_encoding(&self) -> Encoding {
+        self.definition_level_encoding.try_into().unwrap()
+    }
+}
+
+impl DataPageHeaderExt for DataPageHeaderV2 {
+    fn encoding(&self) -> Encoding {
+        self.encoding.try_into().unwrap()
+    }
+
+    fn repetition_level_encoding(&self) -> Encoding {
+        Encoding::Rle
+    }
+
+    fn definition_level_encoding(&self) -> Encoding {
+        Encoding::Rle
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum TimeUnit {
+    Milliseconds,
+    Microseconds,
+    Nanoseconds,
+}
+
+impl From<ParquetTimeUnit> for TimeUnit {
+    fn from(encoding: ParquetTimeUnit) -> Self {
+        match encoding {
+            ParquetTimeUnit::MILLIS(_) => TimeUnit::Milliseconds,
+            ParquetTimeUnit::MICROS(_) => TimeUnit::Microseconds,
+            ParquetTimeUnit::NANOS(_) => TimeUnit::Nanoseconds,
+        }
+    }
+}
+
+impl From<TimeUnit> for ParquetTimeUnit {
+    fn from(unit: TimeUnit) -> Self {
+        match unit {
+            TimeUnit::Milliseconds => ParquetTimeUnit::MILLIS(Default::default()),
+            TimeUnit::Microseconds => ParquetTimeUnit::MICROS(Default::default()),
+            TimeUnit::Nanoseconds => ParquetTimeUnit::NANOS(Default::default()),
+        }
+    }
+}
+
+/// Enum of all valid logical integer types
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum IntegerType {
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    UInt8,
+    UInt16,
+    UInt32,
+    UInt64,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum PrimitiveLogicalType {
+    String,
+    Enum,
+    Decimal(usize, usize),
+    Date,
+    Time {
+        unit: TimeUnit,
+        is_adjusted_to_utc: bool,
+    },
+    Timestamp {
+        unit: TimeUnit,
+        is_adjusted_to_utc: bool,
+    },
+    Integer(IntegerType),
+    Unknown,
+    Json,
+    Bson,
+    Uuid,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum GroupLogicalType {
+    Map,
+    List,
+}
+
+impl From<GroupLogicalType> for ParquetLogicalType {
+    fn from(type_: GroupLogicalType) -> Self {
+        match type_ {
+            GroupLogicalType::Map => ParquetLogicalType::MAP(Default::default()),
+            GroupLogicalType::List => ParquetLogicalType::LIST(Default::default()),
+        }
+    }
+}
+
+impl From<(i32, bool)> for IntegerType {
+    fn from((bit_width, is_signed): (i32, bool)) -> Self {
+        match (bit_width, is_signed) {
+            (8, true) => IntegerType::Int8,
+            (16, true) => IntegerType::Int16,
+            (32, true) => IntegerType::Int32,
+            (64, true) => IntegerType::Int64,
+            (8, false) => IntegerType::UInt8,
+            (16, false) => IntegerType::UInt16,
+            (32, false) => IntegerType::UInt32,
+            (64, false) => IntegerType::UInt64,
+            // The above are the only possible annotations for parquet's int32. Anything else
+            // is a deviation to the parquet specification and we ignore
+            _ => IntegerType::Int32,
+        }
+    }
+}
+
+impl From<IntegerType> for (usize, bool) {
+    fn from(type_: IntegerType) -> (usize, bool) {
+        match type_ {
+            IntegerType::Int8 => (8, true),
+            IntegerType::Int16 => (16, true),
+            IntegerType::Int32 => (32, true),
+            IntegerType::Int64 => (64, true),
+            IntegerType::UInt8 => (8, false),
+            IntegerType::UInt16 => (16, false),
+            IntegerType::UInt32 => (32, false),
+            IntegerType::UInt64 => (64, false),
+        }
+    }
+}
+
+impl TryFrom<ParquetLogicalType> for PrimitiveLogicalType {
+    type Error = Error;
+
+    fn try_from(type_: ParquetLogicalType) -> Result<Self, Self::Error> {
+        Ok(match type_ {
+            ParquetLogicalType::STRING(_) => PrimitiveLogicalType::String,
+            ParquetLogicalType::ENUM(_) => PrimitiveLogicalType::Enum,
+            ParquetLogicalType::DECIMAL(decimal) => PrimitiveLogicalType::Decimal(
+                decimal.precision.try_into()?,
+                decimal.scale.try_into()?,
+            ),
+            ParquetLogicalType::DATE(_) => PrimitiveLogicalType::Date,
+            ParquetLogicalType::TIME(time) => PrimitiveLogicalType::Time {
+                unit: time.unit.into(),
+                is_adjusted_to_utc: time.is_adjusted_to_u_t_c,
+            },
+            ParquetLogicalType::TIMESTAMP(time) => PrimitiveLogicalType::Timestamp {
+                unit: time.unit.into(),
+                is_adjusted_to_utc: time.is_adjusted_to_u_t_c,
+            },
+            ParquetLogicalType::INTEGER(int) => {
+                PrimitiveLogicalType::Integer((int.bit_width as i32, int.is_signed).into())
+            },
+            ParquetLogicalType::UNKNOWN(_) => PrimitiveLogicalType::Unknown,
+            ParquetLogicalType::JSON(_) => PrimitiveLogicalType::Json,
+            ParquetLogicalType::BSON(_) => PrimitiveLogicalType::Bson,
+            ParquetLogicalType::UUID(_) => PrimitiveLogicalType::Uuid,
+            _ => return Err(Error::oos("LogicalType value out of range")),
+        })
+    }
+}
+
+impl TryFrom<ParquetLogicalType> for GroupLogicalType {
+    type Error = Error;
+
+    fn try_from(type_: ParquetLogicalType) -> Result<Self, Self::Error> {
+        Ok(match type_ {
+            ParquetLogicalType::LIST(_) => GroupLogicalType::List,
+            ParquetLogicalType::MAP(_) => GroupLogicalType::Map,
+            _ => return Err(Error::oos("LogicalType value out of range")),
+        })
+    }
+}
+
+impl From<PrimitiveLogicalType> for ParquetLogicalType {
+    fn from(type_: PrimitiveLogicalType) -> Self {
+        match type_ {
+            PrimitiveLogicalType::String => ParquetLogicalType::STRING(Default::default()),
+            PrimitiveLogicalType::Enum => ParquetLogicalType::ENUM(Default::default()),
+            PrimitiveLogicalType::Decimal(precision, scale) => {
+                ParquetLogicalType::DECIMAL(DecimalType {
+                    precision: precision as i32,
+                    scale: scale as i32,
+                })
+            },
+            PrimitiveLogicalType::Date => ParquetLogicalType::DATE(Default::default()),
+            PrimitiveLogicalType::Time {
+                unit,
+                is_adjusted_to_utc,
+            } => ParquetLogicalType::TIME(TimeType {
+                unit: unit.into(),
+                is_adjusted_to_u_t_c: is_adjusted_to_utc,
+            }),
+            PrimitiveLogicalType::Timestamp {
+                unit,
+                is_adjusted_to_utc,
+            } => ParquetLogicalType::TIMESTAMP(TimestampType {
+                unit: unit.into(),
+                is_adjusted_to_u_t_c: is_adjusted_to_utc,
+            }),
+            PrimitiveLogicalType::Integer(integer) => {
+                let (bit_width, is_signed) = integer.into();
+                ParquetLogicalType::INTEGER(IntType {
+                    bit_width: bit_width as i8,
+                    is_signed,
+                })
+            },
+            PrimitiveLogicalType::Unknown => ParquetLogicalType::UNKNOWN(Default::default()),
+            PrimitiveLogicalType::Json => ParquetLogicalType::JSON(Default::default()),
+            PrimitiveLogicalType::Bson => ParquetLogicalType::BSON(Default::default()),
+            PrimitiveLogicalType::Uuid => ParquetLogicalType::UUID(Default::default()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn round_trip_primitive() -> Result<(), Error> {
+        use PrimitiveLogicalType::*;
+        let a = vec![
+            String,
+            Enum,
+            Decimal(3, 1),
+            Date,
+            Time {
+                unit: TimeUnit::Milliseconds,
+                is_adjusted_to_utc: true,
+            },
+            Timestamp {
+                unit: TimeUnit::Milliseconds,
+                is_adjusted_to_utc: true,
+            },
+            Integer(IntegerType::Int16),
+            Unknown,
+            Json,
+            Bson,
+            Uuid,
+        ];
+        for a in a {
+            let c: ParquetLogicalType = a.into();
+            let e: PrimitiveLogicalType = c.try_into()?;
+            assert_eq!(e, a);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn round_trip_encoding() -> Result<(), Error> {
+        use Encoding::*;
+        let a = vec![
+            Plain,
+            PlainDictionary,
+            Rle,
+            BitPacked,
+            DeltaBinaryPacked,
+            DeltaLengthByteArray,
+            DeltaByteArray,
+            RleDictionary,
+            ByteStreamSplit,
+        ];
+        for a in a {
+            let c: ParquetEncoding = a.into();
+            let e: Encoding = c.try_into()?;
+            assert_eq!(e, a);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn round_compression() -> Result<(), Error> {
+        use Compression::*;
+        let a = vec![Uncompressed, Snappy, Gzip, Lzo, Brotli, Lz4, Zstd, Lz4Raw];
+        for a in a {
+            let c: CompressionCodec = a.into();
+            let e: Compression = c.try_into()?;
+            assert_eq!(e, a);
+        }
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/read/column/mod.rs b/crates/polars-parquet/src/parquet/read/column/mod.rs
new file mode 100644
index 000000000000..76e6809ac767
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/column/mod.rs
@@ -0,0 +1,204 @@
+use std::io::{Read, Seek};
+use std::vec::IntoIter;
+
+use super::{get_field_columns, get_page_iterator, PageFilter, PageReader};
+use crate::parquet::error::Error;
+use crate::parquet::metadata::{ColumnChunkMetaData, RowGroupMetaData};
+use crate::parquet::page::CompressedPage;
+use crate::parquet::schema::types::ParquetType;
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+mod stream;
+
+/// Returns a [`ColumnIterator`] of column chunks corresponding to `field`.
+///
+/// Contrarily to [`get_page_iterator`] that returns a single iterator of pages, this iterator
+/// iterates over columns, one by one, and returns a [`PageReader`] per column.
+/// For primitive fields (e.g. `i64`), [`ColumnIterator`] yields exactly one column.
+/// For complex fields, it yields multiple columns.
+/// `max_page_size` is the maximum number of bytes allowed.
+pub fn get_column_iterator<R: Read + Seek>(
+    reader: R,
+    row_group: &RowGroupMetaData,
+    field_name: &str,
+    page_filter: Option<PageFilter>,
+    scratch: Vec<u8>,
+    max_page_size: usize,
+) -> ColumnIterator<R> {
+    let columns = get_field_columns(row_group.columns(), field_name)
+        .cloned()
+        .collect::<Vec<_>>();
+
+    ColumnIterator::new(reader, columns, page_filter, scratch, max_page_size)
+}
+
+/// State of [`MutStreamingIterator`].
+#[derive(Debug)]
+pub enum State<T> {
+    /// Iterator still has elements
+    Some(T),
+    /// Iterator finished
+    Finished(Vec<u8>),
+}
+
+/// A special kind of fallible streaming iterator where `advance` consumes the iterator.
+pub trait MutStreamingIterator: Sized {
+    type Item;
+    type Error;
+
+    fn advance(self) -> std::result::Result<State<Self>, Self::Error>;
+    fn get(&mut self) -> Option<&mut Self::Item>;
+}
+
+/// A [`MutStreamingIterator`] that reads column chunks one by one,
+/// returning a [`PageReader`] per column.
+pub struct ColumnIterator<R: Read + Seek> {
+    reader: Option<R>,
+    columns: Vec<ColumnChunkMetaData>,
+    page_filter: Option<PageFilter>,
+    current: Option<(PageReader<R>, ColumnChunkMetaData)>,
+    scratch: Vec<u8>,
+    max_page_size: usize,
+}
+
+impl<R: Read + Seek> ColumnIterator<R> {
+    /// Returns a new [`ColumnIterator`]
+    /// `max_page_size` is the maximum allowed page size
+    pub fn new(
+        reader: R,
+        mut columns: Vec<ColumnChunkMetaData>,
+        page_filter: Option<PageFilter>,
+        scratch: Vec<u8>,
+        max_page_size: usize,
+    ) -> Self {
+        columns.reverse();
+        Self {
+            reader: Some(reader),
+            scratch,
+            columns,
+            page_filter,
+            current: None,
+            max_page_size,
+        }
+    }
+}
+
+impl<R: Read + Seek> MutStreamingIterator for ColumnIterator<R> {
+    type Item = (PageReader<R>, ColumnChunkMetaData);
+    type Error = Error;
+
+    fn advance(mut self) -> Result<State<Self>, Error> {
+        let (reader, scratch) = if let Some((iter, _)) = self.current {
+            iter.into_inner()
+        } else {
+            (self.reader.unwrap(), self.scratch)
+        };
+        if self.columns.is_empty() {
+            return Ok(State::Finished(scratch));
+        };
+        let column = self.columns.pop().unwrap();
+
+        let iter = get_page_iterator(
+            &column,
+            reader,
+            self.page_filter.clone(),
+            scratch,
+            self.max_page_size,
+        )?;
+        let current = Some((iter, column));
+        Ok(State::Some(Self {
+            reader: None,
+            columns: self.columns,
+            page_filter: self.page_filter,
+            current,
+            scratch: vec![],
+            max_page_size: self.max_page_size,
+        }))
+    }
+
+    fn get(&mut self) -> Option<&mut Self::Item> {
+        self.current.as_mut()
+    }
+}
+
+/// A [`MutStreamingIterator`] of pre-read column chunks
+#[derive(Debug)]
+pub struct ReadColumnIterator {
+    field: ParquetType,
+    chunks: Vec<(Vec<Result<CompressedPage, Error>>, ColumnChunkMetaData)>,
+    current: Option<(IntoIter<Result<CompressedPage, Error>>, ColumnChunkMetaData)>,
+}
+
+impl ReadColumnIterator {
+    /// Returns a new [`ReadColumnIterator`]
+    pub fn new(
+        field: ParquetType,
+        chunks: Vec<(Vec<Result<CompressedPage, Error>>, ColumnChunkMetaData)>,
+    ) -> Self {
+        Self {
+            field,
+            chunks,
+            current: None,
+        }
+    }
+}
+
+impl MutStreamingIterator for ReadColumnIterator {
+    type Item = (IntoIter<Result<CompressedPage, Error>>, ColumnChunkMetaData);
+    type Error = Error;
+
+    fn advance(mut self) -> Result<State<Self>, Error> {
+        if self.chunks.is_empty() {
+            return Ok(State::Finished(vec![]));
+        }
+        self.current = self
+            .chunks
+            .pop()
+            .map(|(pages, meta)| (pages.into_iter(), meta));
+        Ok(State::Some(Self {
+            field: self.field,
+            chunks: self.chunks,
+            current: self.current,
+        }))
+    }
+
+    fn get(&mut self) -> Option<&mut Self::Item> {
+        self.current.as_mut()
+    }
+}
+
+/// Reads all columns that are part of the parquet field `field_name`
+/// # Implementation
+/// This operation is IO-bounded `O(C)` where C is the number of columns associated to
+/// the field (one for non-nested types)
+/// It reads the columns sequentially. Use [`read_column`] to fork this operation to multiple
+/// readers.
+pub fn read_columns<'a, R: Read + Seek>(
+    reader: &mut R,
+    columns: &'a [ColumnChunkMetaData],
+    field_name: &'a str,
+) -> Result<Vec<(&'a ColumnChunkMetaData, Vec<u8>)>, Error> {
+    get_field_columns(columns, field_name)
+        .map(|column| read_column(reader, column).map(|c| (column, c)))
+        .collect()
+}
+
+/// Reads a column chunk into memory
+/// This operation is IO-bounded and allocates the column's `compressed_size`.
+pub fn read_column<R>(reader: &mut R, column: &ColumnChunkMetaData) -> Result<Vec<u8>, Error>
+where
+    R: Read + Seek,
+{
+    let (start, length) = column.byte_range();
+    reader.seek(std::io::SeekFrom::Start(start))?;
+
+    let mut chunk = vec![];
+    chunk.try_reserve(length as usize)?;
+    reader.by_ref().take(length).read_to_end(&mut chunk)?;
+    Ok(chunk)
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub use stream::{read_column_async, read_columns_async};
diff --git a/crates/polars-parquet/src/parquet/read/column/stream.rs b/crates/polars-parquet/src/parquet/read/column/stream.rs
new file mode 100644
index 000000000000..eac4fd497fd6
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/column/stream.rs
@@ -0,0 +1,51 @@
+use futures::future::{try_join_all, BoxFuture};
+use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
+
+use crate::parquet::error::Error;
+use crate::parquet::metadata::ColumnChunkMetaData;
+use crate::parquet::read::get_field_columns;
+
+/// Reads a single column chunk into memory asynchronously
+pub async fn read_column_async<'b, R, F>(
+    factory: F,
+    meta: &ColumnChunkMetaData,
+) -> Result<Vec<u8>, Error>
+where
+    R: AsyncRead + AsyncSeek + Send + Unpin,
+    F: Fn() -> BoxFuture<'b, std::io::Result<R>>,
+{
+    let mut reader = factory().await?;
+    let (start, length) = meta.byte_range();
+    reader.seek(std::io::SeekFrom::Start(start)).await?;
+
+    let mut chunk = vec![];
+    chunk.try_reserve(length as usize)?;
+    reader.take(length).read_to_end(&mut chunk).await?;
+    Result::Ok(chunk)
+}
+
+/// Reads all columns that are part of the parquet field `field_name`
+/// # Implementation
+/// This operation is IO-bounded `O(C)` where C is the number of columns associated to
+/// the field (one for non-nested types)
+///
+/// It does so asynchronously via a single `join_all` over all the necessary columns for
+/// `field_name`.
+pub async fn read_columns_async<
+    'a,
+    'b,
+    R: AsyncRead + AsyncSeek + Send + Unpin,
+    F: Fn() -> BoxFuture<'b, std::io::Result<R>> + Clone,
+>(
+    factory: F,
+    columns: &'a [ColumnChunkMetaData],
+    field_name: &'a str,
+) -> Result<Vec<(&'a ColumnChunkMetaData, Vec<u8>)>, Error> {
+    let fields = get_field_columns(columns, field_name).collect::<Vec<_>>();
+    let futures = fields
+        .iter()
+        .map(|meta| async { read_column_async(factory.clone(), meta).await });
+
+    let columns = try_join_all(futures).await?;
+    Ok(fields.into_iter().zip(columns).collect())
+}
diff --git a/crates/polars-parquet/src/parquet/read/compression.rs b/crates/polars-parquet/src/parquet/read/compression.rs
new file mode 100644
index 000000000000..fbe2ef938f82
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/compression.rs
@@ -0,0 +1,286 @@
+use parquet_format_safe::DataPageHeaderV2;
+use streaming_decompression;
+
+use super::page::PageIterator;
+use crate::parquet::compression::{self, Compression};
+use crate::parquet::error::{Error, Result};
+use crate::parquet::page::{CompressedPage, DataPage, DataPageHeader, DictPage, Page};
+use crate::parquet::FallibleStreamingIterator;
+
+fn decompress_v1(compressed: &[u8], compression: Compression, buffer: &mut [u8]) -> Result<()> {
+    compression::decompress(compression, compressed, buffer)
+}
+
+fn decompress_v2(
+    compressed: &[u8],
+    page_header: &DataPageHeaderV2,
+    compression: Compression,
+    buffer: &mut [u8],
+) -> Result<()> {
+    // When processing data page v2, depending on enabled compression for the
+    // page, we should account for uncompressed data ('offset') of
+    // repetition and definition levels.
+    //
+    // We always use 0 offset for other pages other than v2, `true` flag means
+    // that compression will be applied if decompressor is defined
+    let offset = (page_header.definition_levels_byte_length
+        + page_header.repetition_levels_byte_length) as usize;
+    // When is_compressed flag is missing the page is considered compressed
+    let can_decompress = page_header.is_compressed.unwrap_or(true);
+
+    if can_decompress {
+        if offset > buffer.len() || offset > compressed.len() {
+            return Err(Error::OutOfSpec(
+                "V2 Page Header reported incorrect offset to compressed data".to_string(),
+            ));
+        }
+
+        (buffer[..offset]).copy_from_slice(&compressed[..offset]);
+
+        compression::decompress(compression, &compressed[offset..], &mut buffer[offset..])?;
+    } else {
+        if buffer.len() != compressed.len() {
+            return Err(Error::OutOfSpec(
+                "V2 Page Header reported incorrect decompressed size".to_string(),
+            ));
+        }
+        buffer.copy_from_slice(compressed);
+    }
+    Ok(())
+}
+
+/// decompresses a [`CompressedDataPage`] into `buffer`.
+/// If the page is un-compressed, `buffer` is swapped instead.
+/// Returns whether the page was decompressed.
+pub fn decompress_buffer(
+    compressed_page: &mut CompressedPage,
+    buffer: &mut Vec<u8>,
+) -> Result<bool> {
+    if compressed_page.compression() != Compression::Uncompressed {
+        // prepare the compression buffer
+        let read_size = compressed_page.uncompressed_size();
+
+        if read_size > buffer.capacity() {
+            // dealloc and ignore region, replacing it by a new region.
+            // This won't reallocate - it frees and calls `alloc_zeroed`
+            *buffer = vec![0; read_size];
+        } else if read_size > buffer.len() {
+            // fill what we need with zeros so that we can use them in `Read`.
+            // This won't reallocate
+            buffer.resize(read_size, 0);
+        } else {
+            buffer.truncate(read_size);
+        }
+        match compressed_page {
+            CompressedPage::Data(compressed_page) => match compressed_page.header() {
+                DataPageHeader::V1(_) => {
+                    decompress_v1(&compressed_page.buffer, compressed_page.compression, buffer)?
+                },
+                DataPageHeader::V2(header) => decompress_v2(
+                    &compressed_page.buffer,
+                    header,
+                    compressed_page.compression,
+                    buffer,
+                )?,
+            },
+            CompressedPage::Dict(page) => decompress_v1(&page.buffer, page.compression(), buffer)?,
+        }
+        Ok(true)
+    } else {
+        // page.buffer is already decompressed => swap it with `buffer`, making `page.buffer` the
+        // decompression buffer and `buffer` the decompressed buffer
+        std::mem::swap(compressed_page.buffer(), buffer);
+        Ok(false)
+    }
+}
+
+fn create_page(compressed_page: CompressedPage, buffer: Vec<u8>) -> Page {
+    match compressed_page {
+        CompressedPage::Data(page) => Page::Data(DataPage::new_read(
+            page.header,
+            buffer,
+            page.descriptor,
+            page.selected_rows,
+        )),
+        CompressedPage::Dict(page) => Page::Dict(DictPage {
+            buffer,
+            num_values: page.num_values,
+            is_sorted: page.is_sorted,
+        }),
+    }
+}
+
+/// Decompresses the page, using `buffer` for decompression.
+/// If `page.buffer.len() == 0`, there was no decompression and the buffer was moved.
+/// Else, decompression took place.
+pub fn decompress(mut compressed_page: CompressedPage, buffer: &mut Vec<u8>) -> Result<Page> {
+    decompress_buffer(&mut compressed_page, buffer)?;
+    Ok(create_page(compressed_page, std::mem::take(buffer)))
+}
+
+fn decompress_reuse<P: PageIterator>(
+    mut compressed_page: CompressedPage,
+    iterator: &mut P,
+    buffer: &mut Vec<u8>,
+) -> Result<(Page, bool)> {
+    let was_decompressed = decompress_buffer(&mut compressed_page, buffer)?;
+
+    if was_decompressed {
+        iterator.swap_buffer(compressed_page.buffer())
+    };
+
+    let new_page = create_page(compressed_page, std::mem::take(buffer));
+
+    Ok((new_page, was_decompressed))
+}
+
+/// Decompressor that allows re-using the page buffer of [`PageIterator`].
+/// # Implementation
+/// The implementation depends on whether a page is compressed or not.
+/// > `PageReader(a)`, `CompressedPage(b)`, `Decompressor(c)`, `DecompressedPage(d)`
+/// ### un-compressed pages:
+/// > page iter: `a` is swapped with `b`
+/// > decompress iter: `b` is swapped with `d`, `b` is swapped with `a`
+/// therefore:
+/// * `PageReader` has its buffer back
+/// * `Decompressor`'s buffer is un-used
+/// * `DecompressedPage` has the same data as `CompressedPage` had
+/// ### compressed pages:
+/// > page iter: `a` is swapped with `b`
+/// > decompress iter:
+/// > * `b` is decompressed into `c`
+/// > * `b` is swapped with `a`
+/// > * `c` is moved to `d`
+/// > * (next iteration): `d` is moved to `c`
+/// therefore, while the page is available:
+/// * `PageReader` has its buffer back
+/// * `Decompressor`'s buffer empty
+/// * `DecompressedPage` has the decompressed buffer
+/// after the page is used:
+/// * `PageReader` has its buffer back
+/// * `Decompressor` has its buffer back
+/// * `DecompressedPage` has an empty buffer
+pub struct Decompressor<P: PageIterator> {
+    iter: P,
+    buffer: Vec<u8>,
+    current: Option<Page>,
+    was_decompressed: bool,
+}
+
+impl<P: PageIterator> Decompressor<P> {
+    /// Creates a new [`Decompressor`].
+    pub fn new(iter: P, buffer: Vec<u8>) -> Self {
+        Self {
+            iter,
+            buffer,
+            current: None,
+            was_decompressed: false,
+        }
+    }
+
+    /// Returns two buffers: the first buffer corresponds to the page buffer,
+    /// the second to the decompression buffer.
+    pub fn into_buffers(mut self) -> (Vec<u8>, Vec<u8>) {
+        let mut page_buffer = vec![];
+        self.iter.swap_buffer(&mut page_buffer);
+        (page_buffer, self.buffer)
+    }
+}
+
+impl<P: PageIterator> FallibleStreamingIterator for Decompressor<P> {
+    type Item = Page;
+    type Error = Error;
+
+    fn advance(&mut self) -> Result<()> {
+        if let Some(page) = self.current.as_mut() {
+            if self.was_decompressed {
+                self.buffer = std::mem::take(page.buffer());
+            } else {
+                self.iter.swap_buffer(page.buffer());
+            }
+        }
+
+        let next = self
+            .iter
+            .next()
+            .map(|x| {
+                x.and_then(|x| {
+                    let (page, was_decompressed) =
+                        decompress_reuse(x, &mut self.iter, &mut self.buffer)?;
+                    self.was_decompressed = was_decompressed;
+                    Ok(page)
+                })
+            })
+            .transpose()?;
+        self.current = next;
+        Ok(())
+    }
+
+    fn get(&self) -> Option<&Self::Item> {
+        self.current.as_ref()
+    }
+}
+
+type _Decompressor<I> = streaming_decompression::Decompressor<
+    CompressedPage,
+    Page,
+    fn(CompressedPage, &mut Vec<u8>) -> Result<Page>,
+    Error,
+    I,
+>;
+
+impl streaming_decompression::Compressed for CompressedPage {
+    #[inline]
+    fn is_compressed(&self) -> bool {
+        self.compression() != Compression::Uncompressed
+    }
+}
+
+impl streaming_decompression::Decompressed for Page {
+    #[inline]
+    fn buffer_mut(&mut self) -> &mut Vec<u8> {
+        self.buffer()
+    }
+}
+
+/// A [`FallibleStreamingIterator`] that decompresses [`CompressedPage`] into [`DataPage`].
+/// # Implementation
+/// This decompressor uses an internal [`Vec<u8>`] to perform decompressions which
+/// is re-used across pages, so that a single allocation is required.
+/// If the pages are not compressed, the internal buffer is not used.
+pub struct BasicDecompressor<I: Iterator<Item = Result<CompressedPage>>> {
+    iter: _Decompressor<I>,
+}
+
+impl<I> BasicDecompressor<I>
+where
+    I: Iterator<Item = Result<CompressedPage>>,
+{
+    /// Returns a new [`BasicDecompressor`].
+    pub fn new(iter: I, buffer: Vec<u8>) -> Self {
+        Self {
+            iter: _Decompressor::new(iter, buffer, decompress),
+        }
+    }
+
+    /// Returns its internal buffer, consuming itself.
+    pub fn into_inner(self) -> Vec<u8> {
+        self.iter.into_inner()
+    }
+}
+
+impl<I> FallibleStreamingIterator for BasicDecompressor<I>
+where
+    I: Iterator<Item = Result<CompressedPage>>,
+{
+    type Item = Page;
+    type Error = Error;
+
+    fn advance(&mut self) -> Result<()> {
+        self.iter.advance()
+    }
+
+    fn get(&self) -> Option<&Self::Item> {
+        self.iter.get()
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/read/indexes/deserialize.rs b/crates/polars-parquet/src/parquet/read/indexes/deserialize.rs
new file mode 100644
index 000000000000..1570605f83d0
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/indexes/deserialize.rs
@@ -0,0 +1,27 @@
+use parquet_format_safe::thrift::protocol::TCompactInputProtocol;
+use parquet_format_safe::ColumnIndex;
+
+use crate::parquet::error::Error;
+use crate::parquet::indexes::{BooleanIndex, ByteIndex, FixedLenByteIndex, Index, NativeIndex};
+use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
+
+pub fn deserialize(data: &[u8], primitive_type: PrimitiveType) -> Result<Box<dyn Index>, Error> {
+    let mut prot = TCompactInputProtocol::new(data, data.len() * 2 + 1024);
+
+    let index = ColumnIndex::read_from_in_protocol(&mut prot)?;
+
+    let index = match primitive_type.physical_type {
+        PhysicalType::Boolean => Box::new(BooleanIndex::try_new(index)?) as Box<dyn Index>,
+        PhysicalType::Int32 => Box::new(NativeIndex::<i32>::try_new(index, primitive_type)?),
+        PhysicalType::Int64 => Box::new(NativeIndex::<i64>::try_new(index, primitive_type)?),
+        PhysicalType::Int96 => Box::new(NativeIndex::<[u32; 3]>::try_new(index, primitive_type)?),
+        PhysicalType::Float => Box::new(NativeIndex::<f32>::try_new(index, primitive_type)?),
+        PhysicalType::Double => Box::new(NativeIndex::<f64>::try_new(index, primitive_type)?),
+        PhysicalType::ByteArray => Box::new(ByteIndex::try_new(index, primitive_type)?),
+        PhysicalType::FixedLenByteArray(_) => {
+            Box::new(FixedLenByteIndex::try_new(index, primitive_type)?)
+        },
+    };
+
+    Ok(index)
+}
diff --git a/crates/polars-parquet/src/parquet/read/indexes/mod.rs b/crates/polars-parquet/src/parquet/read/indexes/mod.rs
new file mode 100644
index 000000000000..1e1919c84c75
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/indexes/mod.rs
@@ -0,0 +1,4 @@
+mod deserialize;
+mod read;
+
+pub use read::*;
diff --git a/crates/polars-parquet/src/parquet/read/indexes/read.rs b/crates/polars-parquet/src/parquet/read/indexes/read.rs
new file mode 100644
index 000000000000..379fb4150766
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/indexes/read.rs
@@ -0,0 +1,131 @@
+use std::convert::TryInto;
+use std::io::{Cursor, Read, Seek, SeekFrom};
+
+use parquet_format_safe::thrift::protocol::TCompactInputProtocol;
+use parquet_format_safe::{ColumnChunk, OffsetIndex, PageLocation};
+
+use super::deserialize::deserialize;
+use crate::parquet::error::Error;
+use crate::parquet::indexes::Index;
+use crate::parquet::metadata::ColumnChunkMetaData;
+
+fn prepare_read<F: Fn(&ColumnChunk) -> Option<i64>, G: Fn(&ColumnChunk) -> Option<i32>>(
+    chunks: &[ColumnChunkMetaData],
+    get_offset: F,
+    get_length: G,
+) -> Result<(u64, Vec<usize>), Error> {
+    // c1: [start, length]
+    // ...
+    // cN: [start, length]
+
+    let first_chunk = if let Some(chunk) = chunks.first() {
+        chunk
+    } else {
+        return Ok((0, vec![]));
+    };
+    let metadata = first_chunk.column_chunk();
+
+    let offset: u64 = if let Some(offset) = get_offset(metadata) {
+        offset.try_into()?
+    } else {
+        return Ok((0, vec![]));
+    };
+
+    let lengths = chunks
+        .iter()
+        .map(|x| get_length(x.column_chunk()))
+        .map(|maybe_length| {
+            let index_length = maybe_length.ok_or_else(|| {
+                Error::oos("The column length must exist if column offset exists")
+            })?;
+
+            Ok(index_length.try_into()?)
+        })
+        .collect::<Result<Vec<_>, Error>>()?;
+
+    Ok((offset, lengths))
+}
+
+fn prepare_column_index_read(chunks: &[ColumnChunkMetaData]) -> Result<(u64, Vec<usize>), Error> {
+    prepare_read(chunks, |x| x.column_index_offset, |x| x.column_index_length)
+}
+
+fn prepare_offset_index_read(chunks: &[ColumnChunkMetaData]) -> Result<(u64, Vec<usize>), Error> {
+    prepare_read(chunks, |x| x.offset_index_offset, |x| x.offset_index_length)
+}
+
+fn deserialize_column_indexes(
+    chunks: &[ColumnChunkMetaData],
+    data: &[u8],
+    lengths: Vec<usize>,
+) -> Result<Vec<Box<dyn Index>>, Error> {
+    let mut start = 0;
+    let data = lengths.into_iter().map(|length| {
+        let r = &data[start..start + length];
+        start += length;
+        r
+    });
+
+    chunks
+        .iter()
+        .zip(data)
+        .map(|(chunk, data)| {
+            let primitive_type = chunk.descriptor().descriptor.primitive_type.clone();
+            deserialize(data, primitive_type)
+        })
+        .collect()
+}
+
+/// Reads the column indexes of all [`ColumnChunkMetaData`] and deserializes them into [`Index`].
+/// Returns an empty vector if indexes are not available
+pub fn read_columns_indexes<R: Read + Seek>(
+    reader: &mut R,
+    chunks: &[ColumnChunkMetaData],
+) -> Result<Vec<Box<dyn Index>>, Error> {
+    let (offset, lengths) = prepare_column_index_read(chunks)?;
+
+    let length = lengths.iter().sum::<usize>();
+
+    reader.seek(SeekFrom::Start(offset))?;
+
+    let mut data = vec![];
+    data.try_reserve(length)?;
+    reader.by_ref().take(length as u64).read_to_end(&mut data)?;
+
+    deserialize_column_indexes(chunks, &data, lengths)
+}
+
+fn deserialize_page_locations(
+    data: &[u8],
+    column_number: usize,
+) -> Result<Vec<Vec<PageLocation>>, Error> {
+    let len = data.len() * 2 + 1024;
+    let mut reader = Cursor::new(data);
+
+    (0..column_number)
+        .map(|_| {
+            let mut prot = TCompactInputProtocol::new(&mut reader, len);
+            let offset = OffsetIndex::read_from_in_protocol(&mut prot)?;
+            Ok(offset.page_locations)
+        })
+        .collect()
+}
+
+/// Read [`PageLocation`]s from the [`ColumnChunkMetaData`]s.
+/// Returns an empty vector if indexes are not available
+pub fn read_pages_locations<R: Read + Seek>(
+    reader: &mut R,
+    chunks: &[ColumnChunkMetaData],
+) -> Result<Vec<Vec<PageLocation>>, Error> {
+    let (offset, lengths) = prepare_offset_index_read(chunks)?;
+
+    let length = lengths.iter().sum::<usize>();
+
+    reader.seek(SeekFrom::Start(offset))?;
+
+    let mut data = vec![];
+    data.try_reserve(length)?;
+    reader.by_ref().take(length as u64).read_to_end(&mut data)?;
+
+    deserialize_page_locations(&data, chunks.len())
+}
diff --git a/crates/polars-parquet/src/parquet/read/levels.rs b/crates/polars-parquet/src/parquet/read/levels.rs
new file mode 100644
index 000000000000..69d12cff9194
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/levels.rs
@@ -0,0 +1,27 @@
+/// Returns the number of bits needed to store the given maximum definition or repetition level.
+#[inline]
+pub fn get_bit_width(max_level: i16) -> u32 {
+    16 - max_level.leading_zeros()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::get_bit_width;
+
+    #[test]
+    fn test_get_bit_width() {
+        assert_eq!(0, get_bit_width(0));
+        assert_eq!(1, get_bit_width(1));
+        assert_eq!(2, get_bit_width(2));
+        assert_eq!(2, get_bit_width(3));
+        assert_eq!(3, get_bit_width(4));
+        assert_eq!(3, get_bit_width(5));
+        assert_eq!(3, get_bit_width(6));
+        assert_eq!(3, get_bit_width(7));
+        assert_eq!(4, get_bit_width(8));
+        assert_eq!(4, get_bit_width(15));
+
+        assert_eq!(8, get_bit_width(255));
+        assert_eq!(9, get_bit_width(256));
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/read/metadata.rs b/crates/polars-parquet/src/parquet/read/metadata.rs
new file mode 100644
index 000000000000..a75b939a513c
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/metadata.rs
@@ -0,0 +1,101 @@
+use std::cmp::min;
+use std::convert::TryInto;
+use std::io::{Read, Seek, SeekFrom};
+
+use parquet_format_safe::thrift::protocol::TCompactInputProtocol;
+use parquet_format_safe::FileMetaData as TFileMetaData;
+
+use super::super::metadata::FileMetaData;
+use super::super::{DEFAULT_FOOTER_READ_SIZE, FOOTER_SIZE, HEADER_SIZE, PARQUET_MAGIC};
+use crate::parquet::error::{Error, Result};
+
+pub(super) fn metadata_len(buffer: &[u8], len: usize) -> i32 {
+    i32::from_le_bytes(buffer[len - 8..len - 4].try_into().unwrap())
+}
+
+// see (unstable) Seek::stream_len
+fn stream_len(seek: &mut impl Seek) -> std::result::Result<u64, std::io::Error> {
+    let old_pos = seek.stream_position()?;
+    let len = seek.seek(SeekFrom::End(0))?;
+
+    // Avoid seeking a third time when we were already at the end of the
+    // stream. The branch is usually way cheaper than a seek operation.
+    if old_pos != len {
+        seek.seek(SeekFrom::Start(old_pos))?;
+    }
+
+    Ok(len)
+}
+
+/// Reads a [`FileMetaData`] from the reader, located at the end of the file.
+pub fn read_metadata<R: Read + Seek>(reader: &mut R) -> Result<FileMetaData> {
+    // check file is large enough to hold footer
+    let file_size = stream_len(reader)?;
+    read_metadata_with_size(reader, file_size)
+}
+
+/// Reads a [`FileMetaData`] from the reader, located at the end of the file, with known file size.
+pub fn read_metadata_with_size<R: Read + Seek>(
+    reader: &mut R,
+    file_size: u64,
+) -> Result<FileMetaData> {
+    if file_size < HEADER_SIZE + FOOTER_SIZE {
+        return Err(Error::oos(
+            "A parquet file must contain a header and footer with at least 12 bytes",
+        ));
+    }
+
+    // read and cache up to DEFAULT_FOOTER_READ_SIZE bytes from the end and process the footer
+    let default_end_len = min(DEFAULT_FOOTER_READ_SIZE, file_size) as usize;
+    reader.seek(SeekFrom::End(-(default_end_len as i64)))?;
+
+    let mut buffer = Vec::with_capacity(default_end_len);
+    reader
+        .by_ref()
+        .take(default_end_len as u64)
+        .read_to_end(&mut buffer)?;
+
+    // check this is indeed a parquet file
+    if buffer[default_end_len - 4..] != PARQUET_MAGIC {
+        return Err(Error::oos("The file must end with PAR1"));
+    }
+
+    let metadata_len = metadata_len(&buffer, default_end_len);
+
+    let metadata_len: u64 = metadata_len.try_into()?;
+
+    let footer_len = FOOTER_SIZE + metadata_len;
+    if footer_len > file_size {
+        return Err(Error::oos(
+            "The footer size must be smaller or equal to the file's size",
+        ));
+    }
+
+    let reader: &[u8] = if (footer_len as usize) < buffer.len() {
+        // the whole metadata is in the bytes we already read
+        let remaining = buffer.len() - footer_len as usize;
+        &buffer[remaining..]
+    } else {
+        // the end of file read by default is not long enough, read again including the metadata.
+        reader.seek(SeekFrom::End(-(footer_len as i64)))?;
+
+        buffer.clear();
+        buffer.try_reserve(footer_len as usize)?;
+        reader.take(footer_len).read_to_end(&mut buffer)?;
+
+        &buffer
+    };
+
+    // a highly nested but sparse struct could result in many allocations
+    let max_size = reader.len() * 2 + 1024;
+
+    deserialize_metadata(reader, max_size)
+}
+
+/// Parse loaded metadata bytes
+pub fn deserialize_metadata<R: Read>(reader: R, max_size: usize) -> Result<FileMetaData> {
+    let mut prot = TCompactInputProtocol::new(reader, max_size);
+    let metadata = TFileMetaData::read_from_in_protocol(&mut prot)?;
+
+    FileMetaData::try_from_thrift(metadata)
+}
diff --git a/crates/polars-parquet/src/parquet/read/mod.rs b/crates/polars-parquet/src/parquet/read/mod.rs
new file mode 100644
index 000000000000..d5790c205cb1
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/mod.rs
@@ -0,0 +1,237 @@
+mod column;
+mod compression;
+mod indexes;
+pub mod levels;
+mod metadata;
+mod page;
+#[cfg(feature = "async")]
+mod stream;
+
+use std::io::{Read, Seek, SeekFrom};
+use std::sync::Arc;
+
+pub use column::*;
+pub use compression::{decompress, BasicDecompressor, Decompressor};
+pub use indexes::{read_columns_indexes, read_pages_locations};
+pub use metadata::{deserialize_metadata, read_metadata, read_metadata_with_size};
+#[cfg(feature = "async")]
+pub use page::{get_page_stream, get_page_stream_from_column_start};
+pub use page::{IndexedPageReader, PageFilter, PageIterator, PageMetaData, PageReader};
+#[cfg(feature = "async")]
+pub use stream::read_metadata as read_metadata_async;
+
+use crate::parquet::error::Result;
+use crate::parquet::metadata::{ColumnChunkMetaData, FileMetaData, RowGroupMetaData};
+
+/// Filters row group metadata to only those row groups,
+/// for which the predicate function returns true
+pub fn filter_row_groups(
+    metadata: &FileMetaData,
+    predicate: &dyn Fn(&RowGroupMetaData, usize) -> bool,
+) -> FileMetaData {
+    let mut filtered_row_groups = Vec::<RowGroupMetaData>::new();
+    for (i, row_group_metadata) in metadata.row_groups.iter().enumerate() {
+        if predicate(row_group_metadata, i) {
+            filtered_row_groups.push(row_group_metadata.clone());
+        }
+    }
+    let mut metadata = metadata.clone();
+    metadata.row_groups = filtered_row_groups;
+    metadata
+}
+
+/// Returns a new [`PageReader`] by seeking `reader` to the beginning of `column_chunk`.
+pub fn get_page_iterator<R: Read + Seek>(
+    column_chunk: &ColumnChunkMetaData,
+    mut reader: R,
+    pages_filter: Option<PageFilter>,
+    scratch: Vec<u8>,
+    max_page_size: usize,
+) -> Result<PageReader<R>> {
+    let pages_filter = pages_filter.unwrap_or_else(|| Arc::new(|_, _| true));
+
+    let (col_start, _) = column_chunk.byte_range();
+    reader.seek(SeekFrom::Start(col_start))?;
+    Ok(PageReader::new(
+        reader,
+        column_chunk,
+        pages_filter,
+        scratch,
+        max_page_size,
+    ))
+}
+
+/// Returns all [`ColumnChunkMetaData`] associated to `field_name`.
+/// For non-nested types, this returns an iterator with a single column
+pub fn get_field_columns<'a>(
+    columns: &'a [ColumnChunkMetaData],
+    field_name: &'a str,
+) -> impl Iterator<Item = &'a ColumnChunkMetaData> {
+    columns
+        .iter()
+        .filter(move |x| x.descriptor().path_in_schema[0] == field_name)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs::File;
+
+    use super::*;
+    use crate::parquet::tests::get_path;
+    use crate::parquet::FallibleStreamingIterator;
+
+    #[test]
+    fn basic() -> Result<()> {
+        let mut testdata = get_path();
+        testdata.push("alltypes_plain.parquet");
+        let mut file = File::open(testdata).unwrap();
+
+        let metadata = read_metadata(&mut file)?;
+
+        let row_group = 0;
+        let column = 0;
+        let column_metadata = &metadata.row_groups[row_group].columns()[column];
+        let buffer = vec![];
+        let mut iter = get_page_iterator(column_metadata, &mut file, None, buffer, 1024 * 1024)?;
+
+        let dict = iter.next().unwrap().unwrap();
+        assert_eq!(dict.num_values(), 0);
+        let page = iter.next().unwrap().unwrap();
+        assert_eq!(page.num_values(), 8);
+        Ok(())
+    }
+
+    #[test]
+    fn reuse_buffer() -> Result<()> {
+        let mut testdata = get_path();
+        testdata.push("alltypes_plain.snappy.parquet");
+        let mut file = File::open(testdata).unwrap();
+
+        let metadata = read_metadata(&mut file)?;
+
+        let row_group = 0;
+        let column = 0;
+        let column_metadata = &metadata.row_groups[row_group].columns()[column];
+        let buffer = vec![0];
+        let iterator = get_page_iterator(column_metadata, &mut file, None, buffer, 1024 * 1024)?;
+
+        let buffer = vec![];
+        let mut iterator = Decompressor::new(iterator, buffer);
+
+        let _dict = iterator.next()?.unwrap();
+        let _page = iterator.next()?.unwrap();
+
+        assert!(iterator.next()?.is_none());
+        let (a, b) = iterator.into_buffers();
+        assert_eq!(a.len(), 11); // note: compressed is higher in this example.
+        assert_eq!(b.len(), 9);
+
+        Ok(())
+    }
+
+    #[test]
+    fn reuse_buffer_decompress() -> Result<()> {
+        let mut testdata = get_path();
+        testdata.push("alltypes_plain.parquet");
+        let mut file = File::open(testdata).unwrap();
+
+        let metadata = read_metadata(&mut file)?;
+
+        let row_group = 0;
+        let column = 0;
+        let column_metadata = &metadata.row_groups[row_group].columns()[column];
+        let buffer = vec![1];
+        let iterator = get_page_iterator(column_metadata, &mut file, None, buffer, 1024 * 1024)?;
+
+        let buffer = vec![];
+        let mut iterator = Decompressor::new(iterator, buffer);
+
+        // dict
+        iterator.next()?.unwrap();
+        // page
+        iterator.next()?.unwrap();
+
+        assert!(iterator.next()?.is_none());
+        let (a, b) = iterator.into_buffers();
+
+        assert_eq!(a.len(), 11);
+        assert_eq!(b.len(), 0); // the decompressed buffer is never used because it is always swapped with the other buffer.
+
+        Ok(())
+    }
+
+    #[test]
+    fn column_iter() -> Result<()> {
+        let mut testdata = get_path();
+        testdata.push("alltypes_plain.parquet");
+        let mut file = File::open(testdata).unwrap();
+
+        let metadata = read_metadata(&mut file)?;
+
+        let row_group = 0;
+        let column = 0;
+        let column_metadata = &metadata.row_groups[row_group].columns()[column];
+        let iter: Vec<_> =
+            get_page_iterator(column_metadata, &mut file, None, vec![], usize::MAX)?.collect();
+
+        let field = metadata.schema().fields()[0].clone();
+        let mut iter = ReadColumnIterator::new(field, vec![(iter, column_metadata.clone())]);
+
+        loop {
+            match iter.advance()? {
+                State::Some(mut new_iter) => {
+                    if let Some((pages, _descriptor)) = new_iter.get() {
+                        let mut iterator = BasicDecompressor::new(pages, vec![]);
+                        while let Some(_page) = iterator.next()? {
+                            // do something with it
+                        }
+                        let _internal_buffer = iterator.into_inner();
+                    }
+                    iter = new_iter;
+                },
+                State::Finished(_buffer) => {
+                    assert!(_buffer.is_empty()); // data is uncompressed => buffer is always moved
+                    break;
+                },
+            }
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn basics_column_iterator() -> Result<()> {
+        let mut testdata = get_path();
+        testdata.push("alltypes_plain.parquet");
+        let mut file = File::open(testdata).unwrap();
+
+        let metadata = read_metadata(&mut file)?;
+
+        let mut iter = ColumnIterator::new(
+            file,
+            metadata.row_groups[0].columns().to_vec(),
+            None,
+            vec![],
+            usize::MAX, // we trust the file is correct
+        );
+
+        loop {
+            match iter.advance()? {
+                State::Some(mut new_iter) => {
+                    if let Some((pages, _descriptor)) = new_iter.get() {
+                        let mut iterator = BasicDecompressor::new(pages, vec![]);
+                        while let Some(_page) = iterator.next()? {
+                            // do something with it
+                        }
+                        let _internal_buffer = iterator.into_inner();
+                    }
+                    iter = new_iter;
+                },
+                State::Finished(_buffer) => {
+                    assert!(_buffer.is_empty()); // data is uncompressed => buffer is always moved
+                    break;
+                },
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/read/page/indexed_reader.rs b/crates/polars-parquet/src/parquet/read/page/indexed_reader.rs
new file mode 100644
index 000000000000..ac11e725070c
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/page/indexed_reader.rs
@@ -0,0 +1,204 @@
+use std::collections::VecDeque;
+use std::io::{Cursor, Read, Seek, SeekFrom};
+
+use super::reader::{finish_page, read_page_header, PageMetaData};
+use crate::parquet::error::Error;
+use crate::parquet::indexes::{FilteredPage, Interval};
+use crate::parquet::metadata::{ColumnChunkMetaData, Descriptor};
+use crate::parquet::page::{CompressedDictPage, CompressedPage, ParquetPageHeader};
+use crate::parquet::parquet_bridge::Compression;
+
+#[derive(Debug, Clone, Copy)]
+enum State {
+    MaybeDict,
+    Data,
+}
+
+/// A fallible [`Iterator`] of [`CompressedPage`]. This iterator leverages page indexes
+/// to skip pages that are not needed. Consequently, the pages from this
+/// iterator always have [`Some`] [`crate::parquet::page::CompressedDataPage::selected_rows()`]
+pub struct IndexedPageReader<R: Read + Seek> {
+    // The source
+    reader: R,
+
+    column_start: u64,
+    compression: Compression,
+
+    // used to deserialize dictionary pages and attach the descriptor to every read page
+    descriptor: Descriptor,
+
+    // buffer to read the whole page [header][data] into memory
+    buffer: Vec<u8>,
+
+    // buffer to store the data [data] and re-use across pages
+    data_buffer: Vec<u8>,
+
+    pages: VecDeque<FilteredPage>,
+
+    state: State,
+}
+
+fn read_page<R: Read + Seek>(
+    reader: &mut R,
+    start: u64,
+    length: usize,
+    buffer: &mut Vec<u8>,
+    data: &mut Vec<u8>,
+) -> Result<ParquetPageHeader, Error> {
+    // seek to the page
+    reader.seek(SeekFrom::Start(start))?;
+
+    // read [header][data] to buffer
+    buffer.clear();
+    buffer.try_reserve(length)?;
+    reader.by_ref().take(length as u64).read_to_end(buffer)?;
+
+    // deserialize [header]
+    let mut reader = Cursor::new(buffer);
+    let page_header = read_page_header(&mut reader, 1024 * 1024)?;
+    let header_size = reader.stream_position().unwrap() as usize;
+    let buffer = reader.into_inner();
+
+    // copy [data]
+    data.clear();
+    data.extend_from_slice(&buffer[header_size..]);
+    Ok(page_header)
+}
+
+fn read_dict_page<R: Read + Seek>(
+    reader: &mut R,
+    start: u64,
+    length: usize,
+    buffer: &mut Vec<u8>,
+    data: &mut Vec<u8>,
+    compression: Compression,
+    descriptor: &Descriptor,
+) -> Result<CompressedDictPage, Error> {
+    let page_header = read_page(reader, start, length, buffer, data)?;
+
+    let page = finish_page(page_header, data, compression, descriptor, None)?;
+    if let CompressedPage::Dict(page) = page {
+        Ok(page)
+    } else {
+        Err(Error::oos(
+            "The first page is not a dictionary page but it should",
+        ))
+    }
+}
+
+impl<R: Read + Seek> IndexedPageReader<R> {
+    /// Returns a new [`IndexedPageReader`].
+    pub fn new(
+        reader: R,
+        column: &ColumnChunkMetaData,
+        pages: Vec<FilteredPage>,
+        buffer: Vec<u8>,
+        data_buffer: Vec<u8>,
+    ) -> Self {
+        Self::new_with_page_meta(reader, column.into(), pages, buffer, data_buffer)
+    }
+
+    /// Returns a new [`IndexedPageReader`] with [`PageMetaData`].
+    pub fn new_with_page_meta(
+        reader: R,
+        column: PageMetaData,
+        pages: Vec<FilteredPage>,
+        buffer: Vec<u8>,
+        data_buffer: Vec<u8>,
+    ) -> Self {
+        let pages = pages.into_iter().collect();
+        Self {
+            reader,
+            column_start: column.column_start,
+            compression: column.compression,
+            descriptor: column.descriptor,
+            buffer,
+            data_buffer,
+            pages,
+            state: State::MaybeDict,
+        }
+    }
+
+    /// consumes self into the reader and the two internal buffers
+    pub fn into_inner(self) -> (R, Vec<u8>, Vec<u8>) {
+        (self.reader, self.buffer, self.data_buffer)
+    }
+
+    fn read_page(
+        &mut self,
+        start: u64,
+        length: usize,
+        selected_rows: Vec<Interval>,
+    ) -> Result<CompressedPage, Error> {
+        // it will be read - take buffer
+        let mut data = std::mem::take(&mut self.data_buffer);
+
+        let page_header = read_page(&mut self.reader, start, length, &mut self.buffer, &mut data)?;
+
+        finish_page(
+            page_header,
+            &mut data,
+            self.compression,
+            &self.descriptor,
+            Some(selected_rows),
+        )
+    }
+
+    fn read_dict(&mut self) -> Option<Result<CompressedPage, Error>> {
+        // a dictionary page exists iff the first data page is not at the start of
+        // the column
+        let (start, length) = match self.pages.get(0) {
+            Some(page) => {
+                let length = (page.start - self.column_start) as usize;
+                if length > 0 {
+                    (self.column_start, length)
+                } else {
+                    return None;
+                }
+            },
+            None => return None,
+        };
+
+        // it will be read - take buffer
+        let mut data = std::mem::take(&mut self.data_buffer);
+
+        let maybe_page = read_dict_page(
+            &mut self.reader,
+            start,
+            length,
+            &mut self.buffer,
+            &mut data,
+            self.compression,
+            &self.descriptor,
+        );
+        Some(maybe_page.map(CompressedPage::Dict))
+    }
+}
+
+impl<R: Read + Seek> Iterator for IndexedPageReader<R> {
+    type Item = Result<CompressedPage, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.state {
+            State::MaybeDict => {
+                self.state = State::Data;
+                if let Some(dict) = self.read_dict() {
+                    Some(dict)
+                } else {
+                    self.next()
+                }
+            },
+            State::Data => {
+                if let Some(page) = self.pages.pop_front() {
+                    if page.selected_rows.is_empty() {
+                        self.next()
+                    } else {
+                        Some(self.read_page(page.start, page.length, page.selected_rows))
+                    }
+                } else {
+                    None
+                }
+            },
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/read/page/mod.rs b/crates/polars-parquet/src/parquet/read/page/mod.rs
new file mode 100644
index 000000000000..d3437ef5d14d
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/page/mod.rs
@@ -0,0 +1,18 @@
+mod indexed_reader;
+mod reader;
+#[cfg(feature = "async")]
+mod stream;
+
+pub use indexed_reader::IndexedPageReader;
+pub use reader::{PageFilter, PageMetaData, PageReader};
+
+use crate::parquet::error::Error;
+use crate::parquet::page::CompressedPage;
+
+pub trait PageIterator: Iterator<Item = Result<CompressedPage, Error>> {
+    fn swap_buffer(&mut self, buffer: &mut Vec<u8>);
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub use stream::{get_page_stream, get_page_stream_from_column_start};
diff --git a/crates/polars-parquet/src/parquet/read/page/reader.rs b/crates/polars-parquet/src/parquet/read/page/reader.rs
new file mode 100644
index 000000000000..e0078f97c6d4
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/page/reader.rs
@@ -0,0 +1,306 @@
+use std::convert::TryInto;
+use std::io::Read;
+use std::sync::Arc;
+
+use parquet_format_safe::thrift::protocol::TCompactInputProtocol;
+
+use super::PageIterator;
+use crate::parquet::compression::Compression;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::indexes::Interval;
+use crate::parquet::metadata::{ColumnChunkMetaData, Descriptor};
+use crate::parquet::page::{
+    CompressedDataPage, CompressedDictPage, CompressedPage, DataPageHeader, PageType,
+    ParquetPageHeader,
+};
+use crate::parquet::parquet_bridge::Encoding;
+
+/// This meta is a small part of [`ColumnChunkMetaData`].
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct PageMetaData {
+    /// The start offset of this column chunk in file.
+    pub column_start: u64,
+    /// The number of values in this column chunk.
+    pub num_values: i64,
+    /// Compression type
+    pub compression: Compression,
+    /// The descriptor of this parquet column
+    pub descriptor: Descriptor,
+}
+
+impl PageMetaData {
+    /// Returns a new [`PageMetaData`].
+    pub fn new(
+        column_start: u64,
+        num_values: i64,
+        compression: Compression,
+        descriptor: Descriptor,
+    ) -> Self {
+        Self {
+            column_start,
+            num_values,
+            compression,
+            descriptor,
+        }
+    }
+}
+
+impl From<&ColumnChunkMetaData> for PageMetaData {
+    fn from(column: &ColumnChunkMetaData) -> Self {
+        Self {
+            column_start: column.byte_range().0,
+            num_values: column.num_values(),
+            compression: column.compression(),
+            descriptor: column.descriptor().descriptor.clone(),
+        }
+    }
+}
+
+/// Type declaration for a page filter
+pub type PageFilter = Arc<dyn Fn(&Descriptor, &DataPageHeader) -> bool + Send + Sync>;
+
+/// A fallible [`Iterator`] of [`CompressedDataPage`]. This iterator reads pages back
+/// to back until all pages have been consumed.
+/// The pages from this iterator always have [`None`] [`crate::parquet::page::CompressedDataPage::selected_rows()`] since
+/// filter pushdown is not supported without a
+/// pre-computed [page index](https://github.com/apache/parquet-format/blob/master/PageIndex.md).
+pub struct PageReader<R: Read> {
+    // The source
+    reader: R,
+
+    compression: Compression,
+
+    // The number of values we have seen so far.
+    seen_num_values: i64,
+
+    // The number of total values in this column chunk.
+    total_num_values: i64,
+
+    pages_filter: PageFilter,
+
+    descriptor: Descriptor,
+
+    // The currently allocated buffer.
+    pub(crate) scratch: Vec<u8>,
+
+    // Maximum page size (compressed or uncompressed) to limit allocations
+    max_page_size: usize,
+}
+
+impl<R: Read> PageReader<R> {
+    /// Returns a new [`PageReader`].
+    ///
+    /// It assumes that the reader has been `sought` (`seek`) to the beginning of `column`.
+    /// The parameter `max_header_size`
+    pub fn new(
+        reader: R,
+        column: &ColumnChunkMetaData,
+        pages_filter: PageFilter,
+        scratch: Vec<u8>,
+        max_page_size: usize,
+    ) -> Self {
+        Self::new_with_page_meta(reader, column.into(), pages_filter, scratch, max_page_size)
+    }
+
+    /// Create a a new [`PageReader`] with [`PageMetaData`].
+    ///
+    /// It assumes that the reader has been `sought` (`seek`) to the beginning of `column`.
+    pub fn new_with_page_meta(
+        reader: R,
+        reader_meta: PageMetaData,
+        pages_filter: PageFilter,
+        scratch: Vec<u8>,
+        max_page_size: usize,
+    ) -> Self {
+        Self {
+            reader,
+            total_num_values: reader_meta.num_values,
+            compression: reader_meta.compression,
+            seen_num_values: 0,
+            descriptor: reader_meta.descriptor,
+            pages_filter,
+            scratch,
+            max_page_size,
+        }
+    }
+
+    /// Returns the reader and this Readers' interval buffer
+    pub fn into_inner(self) -> (R, Vec<u8>) {
+        (self.reader, self.scratch)
+    }
+}
+
+impl<R: Read> PageIterator for PageReader<R> {
+    fn swap_buffer(&mut self, scratch: &mut Vec<u8>) {
+        std::mem::swap(&mut self.scratch, scratch)
+    }
+}
+
+impl<R: Read> Iterator for PageReader<R> {
+    type Item = Result<CompressedPage>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut buffer = std::mem::take(&mut self.scratch);
+        let maybe_maybe_page = next_page(self, &mut buffer).transpose();
+        if let Some(ref maybe_page) = maybe_maybe_page {
+            if let Ok(CompressedPage::Data(page)) = maybe_page {
+                // check if we should filter it (only valid for data pages)
+                let to_consume = (self.pages_filter)(&self.descriptor, page.header());
+                if !to_consume {
+                    self.scratch = std::mem::take(&mut buffer);
+                    return self.next();
+                }
+            }
+        } else {
+            // no page => we take back the buffer
+            self.scratch = std::mem::take(&mut buffer);
+        }
+        maybe_maybe_page
+    }
+}
+
+/// Reads Page header from Thrift.
+pub(super) fn read_page_header<R: Read>(
+    reader: &mut R,
+    max_size: usize,
+) -> Result<ParquetPageHeader> {
+    let mut prot = TCompactInputProtocol::new(reader, max_size);
+    let page_header = ParquetPageHeader::read_from_in_protocol(&mut prot)?;
+    Ok(page_header)
+}
+
+/// This function is lightweight and executes a minimal amount of work so that it is IO bounded.
+// Any un-necessary CPU-intensive tasks SHOULD be executed on individual pages.
+fn next_page<R: Read>(
+    reader: &mut PageReader<R>,
+    buffer: &mut Vec<u8>,
+) -> Result<Option<CompressedPage>> {
+    if reader.seen_num_values >= reader.total_num_values {
+        return Ok(None);
+    };
+    build_page(reader, buffer)
+}
+
+pub(super) fn build_page<R: Read>(
+    reader: &mut PageReader<R>,
+    buffer: &mut Vec<u8>,
+) -> Result<Option<CompressedPage>> {
+    let page_header = read_page_header(&mut reader.reader, reader.max_page_size)?;
+
+    reader.seen_num_values += get_page_header(&page_header)?
+        .map(|x| x.num_values() as i64)
+        .unwrap_or_default();
+
+    let read_size: usize = page_header.compressed_page_size.try_into()?;
+
+    if read_size > reader.max_page_size {
+        return Err(Error::WouldOverAllocate);
+    }
+
+    buffer.clear();
+    buffer.try_reserve(read_size)?;
+    let bytes_read = reader
+        .reader
+        .by_ref()
+        .take(read_size as u64)
+        .read_to_end(buffer)?;
+
+    if bytes_read != read_size {
+        return Err(Error::oos(
+            "The page header reported the wrong page size".to_string(),
+        ));
+    }
+
+    finish_page(
+        page_header,
+        buffer,
+        reader.compression,
+        &reader.descriptor,
+        None,
+    )
+    .map(Some)
+}
+
+pub(super) fn finish_page(
+    page_header: ParquetPageHeader,
+    data: &mut Vec<u8>,
+    compression: Compression,
+    descriptor: &Descriptor,
+    selected_rows: Option<Vec<Interval>>,
+) -> Result<CompressedPage> {
+    let type_ = page_header.type_.try_into()?;
+    let uncompressed_page_size = page_header.uncompressed_page_size.try_into()?;
+    match type_ {
+        PageType::DictionaryPage => {
+            let dict_header = page_header.dictionary_page_header.as_ref().ok_or_else(|| {
+                Error::oos(
+                    "The page header type is a dictionary page but the dictionary header is empty",
+                )
+            })?;
+            let is_sorted = dict_header.is_sorted.unwrap_or(false);
+
+            // move the buffer to `dict_page`
+            let page = CompressedDictPage::new(
+                std::mem::take(data),
+                compression,
+                uncompressed_page_size,
+                dict_header.num_values.try_into()?,
+                is_sorted,
+            );
+
+            Ok(CompressedPage::Dict(page))
+        },
+        PageType::DataPage => {
+            let header = page_header.data_page_header.ok_or_else(|| {
+                Error::oos("The page header type is a v1 data page but the v1 data header is empty")
+            })?;
+
+            Ok(CompressedPage::Data(CompressedDataPage::new_read(
+                DataPageHeader::V1(header),
+                std::mem::take(data),
+                compression,
+                uncompressed_page_size,
+                descriptor.clone(),
+                selected_rows,
+            )))
+        },
+        PageType::DataPageV2 => {
+            let header = page_header.data_page_header_v2.ok_or_else(|| {
+                Error::oos("The page header type is a v2 data page but the v2 data header is empty")
+            })?;
+
+            Ok(CompressedPage::Data(CompressedDataPage::new_read(
+                DataPageHeader::V2(header),
+                std::mem::take(data),
+                compression,
+                uncompressed_page_size,
+                descriptor.clone(),
+                selected_rows,
+            )))
+        },
+    }
+}
+
+pub(super) fn get_page_header(header: &ParquetPageHeader) -> Result<Option<DataPageHeader>> {
+    let type_ = header.type_.try_into()?;
+    Ok(match type_ {
+        PageType::DataPage => {
+            let header = header.data_page_header.clone().ok_or_else(|| {
+                Error::oos("The page header type is a v1 data page but the v1 header is empty")
+            })?;
+            let _: Encoding = header.encoding.try_into()?;
+            let _: Encoding = header.repetition_level_encoding.try_into()?;
+            let _: Encoding = header.definition_level_encoding.try_into()?;
+
+            Some(DataPageHeader::V1(header))
+        },
+        PageType::DataPageV2 => {
+            let header = header.data_page_header_v2.clone().ok_or_else(|| {
+                Error::oos("The page header type is a v1 data page but the v1 header is empty")
+            })?;
+            let _: Encoding = header.encoding.try_into()?;
+            Some(DataPageHeader::V2(header))
+        },
+        _ => None,
+    })
+}
diff --git a/crates/polars-parquet/src/parquet/read/page/stream.rs b/crates/polars-parquet/src/parquet/read/page/stream.rs
new file mode 100644
index 000000000000..657e56a82c4a
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/page/stream.rs
@@ -0,0 +1,138 @@
+use std::io::SeekFrom;
+
+use async_stream::try_stream;
+use futures::io::{copy, sink};
+use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, Stream};
+use parquet_format_safe::thrift::protocol::TCompactInputStreamProtocol;
+
+use super::reader::{finish_page, get_page_header, PageMetaData};
+use super::PageFilter;
+use crate::parquet::compression::Compression;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::metadata::{ColumnChunkMetaData, Descriptor};
+use crate::parquet::page::{CompressedPage, ParquetPageHeader};
+
+/// Returns a stream of compressed data pages
+pub async fn get_page_stream<'a, RR: AsyncRead + Unpin + Send + AsyncSeek>(
+    column_metadata: &'a ColumnChunkMetaData,
+    reader: &'a mut RR,
+    scratch: Vec<u8>,
+    pages_filter: PageFilter,
+    max_page_size: usize,
+) -> Result<impl Stream<Item = Result<CompressedPage>> + 'a> {
+    get_page_stream_with_page_meta(
+        column_metadata.into(),
+        reader,
+        scratch,
+        pages_filter,
+        max_page_size,
+    )
+    .await
+}
+
+/// Returns a stream of compressed data pages from a reader that begins at the start of the column
+pub async fn get_page_stream_from_column_start<'a, R: AsyncRead + Unpin + Send>(
+    column_metadata: &'a ColumnChunkMetaData,
+    reader: &'a mut R,
+    scratch: Vec<u8>,
+    pages_filter: PageFilter,
+    max_header_size: usize,
+) -> Result<impl Stream<Item = Result<CompressedPage>> + 'a> {
+    let page_metadata: PageMetaData = column_metadata.into();
+    Ok(_get_page_stream(
+        reader,
+        page_metadata.num_values,
+        page_metadata.compression,
+        page_metadata.descriptor,
+        scratch,
+        pages_filter,
+        max_header_size,
+    ))
+}
+
+/// Returns a stream of compressed data pages with [`PageMetaData`]
+pub async fn get_page_stream_with_page_meta<RR: AsyncRead + Unpin + Send + AsyncSeek>(
+    page_metadata: PageMetaData,
+    reader: &mut RR,
+    scratch: Vec<u8>,
+    pages_filter: PageFilter,
+    max_page_size: usize,
+) -> Result<impl Stream<Item = Result<CompressedPage>> + '_> {
+    let column_start = page_metadata.column_start;
+    reader.seek(SeekFrom::Start(column_start)).await?;
+    Ok(_get_page_stream(
+        reader,
+        page_metadata.num_values,
+        page_metadata.compression,
+        page_metadata.descriptor,
+        scratch,
+        pages_filter,
+        max_page_size,
+    ))
+}
+
+fn _get_page_stream<R: AsyncRead + Unpin + Send>(
+    reader: &mut R,
+    total_num_values: i64,
+    compression: Compression,
+    descriptor: Descriptor,
+    mut scratch: Vec<u8>,
+    pages_filter: PageFilter,
+    max_page_size: usize,
+) -> impl Stream<Item = Result<CompressedPage>> + '_ {
+    let mut seen_values = 0i64;
+    try_stream! {
+        while seen_values < total_num_values {
+            // the header
+            let page_header = read_page_header(reader, max_page_size).await?;
+
+            let data_header = get_page_header(&page_header)?;
+            seen_values += data_header.as_ref().map(|x| x.num_values() as i64).unwrap_or_default();
+
+            let read_size: usize = page_header.compressed_page_size.try_into()?;
+
+            if let Some(data_header) = data_header {
+                if !pages_filter(&descriptor, &data_header) {
+                    // page to be skipped, we sill need to seek
+                    copy(reader.take(read_size as u64), &mut sink()).await?;
+                    continue
+                }
+            }
+
+            if read_size > max_page_size {
+                Err(Error::WouldOverAllocate)?
+            }
+
+            // followed by the buffer
+            scratch.clear();
+            scratch.try_reserve(read_size)?;
+            let bytes_read = reader
+                .take(read_size as u64)
+                .read_to_end(&mut scratch).await?;
+
+            if bytes_read != read_size {
+                Err(Error::oos(
+                    "The page header reported the wrong page size".to_string(),
+                ))?
+            }
+
+            yield finish_page(
+                page_header,
+                &mut scratch,
+                compression,
+                &descriptor,
+                None,
+            )?;
+        }
+    }
+}
+
+/// Reads Page header from Thrift.
+async fn read_page_header<R: AsyncRead + Unpin + Send>(
+    reader: &mut R,
+    max_page_size: usize,
+) -> Result<ParquetPageHeader> {
+    let mut prot = TCompactInputStreamProtocol::new(reader, max_page_size);
+    let page_header = ParquetPageHeader::stream_from_in_protocol(&mut prot).await?;
+    Ok(page_header)
+}
diff --git a/crates/polars-parquet/src/parquet/read/stream.rs b/crates/polars-parquet/src/parquet/read/stream.rs
new file mode 100644
index 000000000000..e6e47e159937
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/read/stream.rs
@@ -0,0 +1,88 @@
+use std::io::SeekFrom;
+
+use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
+
+use super::super::metadata::FileMetaData;
+use super::super::{DEFAULT_FOOTER_READ_SIZE, FOOTER_SIZE, PARQUET_MAGIC};
+use super::metadata::{deserialize_metadata, metadata_len};
+use crate::parquet::error::{Error, Result};
+use crate::parquet::HEADER_SIZE;
+
+async fn stream_len(
+    seek: &mut (impl AsyncSeek + std::marker::Unpin),
+) -> std::result::Result<u64, std::io::Error> {
+    let old_pos = seek.seek(SeekFrom::Current(0)).await?;
+    let len = seek.seek(SeekFrom::End(0)).await?;
+
+    // Avoid seeking a third time when we were already at the end of the
+    // stream. The branch is usually way cheaper than a seek operation.
+    if old_pos != len {
+        seek.seek(SeekFrom::Start(old_pos)).await?;
+    }
+
+    Ok(len)
+}
+
+/// Asynchronously reads the files' metadata
+pub async fn read_metadata<R: AsyncRead + AsyncSeek + Send + std::marker::Unpin>(
+    reader: &mut R,
+) -> Result<FileMetaData> {
+    let file_size = stream_len(reader).await?;
+
+    if file_size < HEADER_SIZE + FOOTER_SIZE {
+        return Err(Error::oos(
+            "A parquet file must contain a header and footer with at least 12 bytes",
+        ));
+    }
+
+    // read and cache up to DEFAULT_FOOTER_READ_SIZE bytes from the end and process the footer
+    let default_end_len = std::cmp::min(DEFAULT_FOOTER_READ_SIZE, file_size) as usize;
+    reader
+        .seek(SeekFrom::End(-(default_end_len as i64)))
+        .await?;
+
+    let mut buffer = vec![];
+    buffer.try_reserve(default_end_len)?;
+    reader
+        .take(default_end_len as u64)
+        .read_to_end(&mut buffer)
+        .await?;
+
+    // check this is indeed a parquet file
+    if buffer[default_end_len - 4..] != PARQUET_MAGIC {
+        return Err(Error::oos("Invalid Parquet file. Corrupt footer"));
+    }
+
+    let metadata_len = metadata_len(&buffer, default_end_len);
+    let metadata_len: u64 = metadata_len.try_into()?;
+
+    let footer_len = FOOTER_SIZE + metadata_len;
+    if footer_len > file_size {
+        return Err(Error::oos(
+            "The footer size must be smaller or equal to the file's size",
+        ));
+    }
+
+    let reader = if (footer_len as usize) < buffer.len() {
+        // the whole metadata is in the bytes we already read
+        let remaining = buffer.len() - footer_len as usize;
+        &buffer[remaining..]
+    } else {
+        // the end of file read by default is not long enough, read again including the metadata.
+        reader.seek(SeekFrom::End(-(footer_len as i64))).await?;
+
+        buffer.clear();
+        buffer.try_reserve(footer_len as usize)?;
+        reader
+            .take(footer_len as u64)
+            .read_to_end(&mut buffer)
+            .await?;
+
+        &buffer
+    };
+
+    // a highly nested but sparse struct could result in many allocations
+    let max_size = reader.len() * 2 + 1024;
+
+    deserialize_metadata(reader, max_size)
+}
diff --git a/crates/polars-parquet/src/parquet/schema/io_message/from_message.rs b/crates/polars-parquet/src/parquet/schema/io_message/from_message.rs
new file mode 100644
index 000000000000..f04800516d55
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/io_message/from_message.rs
@@ -0,0 +1,1159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet schema parser.
+//! Provides methods to parse and validate string message type into Parquet
+//! [`ParquetType`](crate::parquet::schema::types::ParquetType).
+//!
+//! # Example
+//!
+//! ```rust
+//! use crate::parquet::parquet::schema::io_message::from_message;
+//!
+//! let message_type = "
+//!   message spark_schema {
+//!     OPTIONAL BYTE_ARRAY a (UTF8);
+//!     REQUIRED INT32 b;
+//!     REQUIRED DOUBLE c;
+//!     REQUIRED BOOLEAN d;
+//!     OPTIONAL group e (LIST) {
+//!       REPEATED group list {
+//!         REQUIRED INT32 element;
+//!       }
+//!     }
+//!   }
+//! ";
+//!
+//! let schema = from_message(message_type).expect("Expected valid schema");
+//! println!("{:?}", schema);
+//! ```
+
+use parquet_format_safe::Type;
+use types::PrimitiveLogicalType;
+
+use super::super::types::{ParquetType, TimeUnit};
+use super::super::*;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::types::{GroupConvertedType, PrimitiveConvertedType};
+
+fn is_logical_type(s: &str) -> bool {
+    matches!(
+        s,
+        "INTEGER"
+            | "MAP"
+            | "LIST"
+            | "ENUM"
+            | "DECIMAL"
+            | "DATE"
+            | "TIME"
+            | "TIMESTAMP"
+            | "STRING"
+            | "JSON"
+            | "BSON"
+            | "UUID"
+            | "UNKNOWN"
+            | "INTERVAL"
+    )
+}
+
+fn is_converted_type(s: &str) -> bool {
+    matches!(
+        s,
+        "UTF8"
+            | "ENUM"
+            | "DECIMAL"
+            | "DATE"
+            | "TIME_MILLIS"
+            | "TIME_MICROS"
+            | "TIMESTAMP_MILLIS"
+            | "TIMESTAMP_MICROS"
+            | "UINT_8"
+            | "UINT_16"
+            | "UINT_32"
+            | "UINT_64"
+            | "INT_8"
+            | "INT_16"
+            | "INT_32"
+            | "INT_64"
+            | "JSON"
+            | "BSON"
+            | "INTERVAL"
+    )
+}
+
+fn converted_group_from_str(s: &str) -> Result<GroupConvertedType> {
+    Ok(match s {
+        "MAP" => GroupConvertedType::Map,
+        "MAP_KEY_VALUE" => GroupConvertedType::MapKeyValue,
+        "LIST" => GroupConvertedType::List,
+        other => return Err(Error::oos(format!("Invalid converted type {}", other))),
+    })
+}
+
+fn converted_primitive_from_str(s: &str) -> Option<PrimitiveConvertedType> {
+    use PrimitiveConvertedType::*;
+    Some(match s {
+        "UTF8" => Utf8,
+        "ENUM" => Enum,
+        "DECIMAL" => Decimal(0, 0),
+        "DATE" => Date,
+        "TIME_MILLIS" => TimeMillis,
+        "TIME_MICROS" => TimeMicros,
+        "TIMESTAMP_MILLIS" => TimestampMillis,
+        "TIMESTAMP_MICROS" => TimestampMicros,
+        "UINT_8" => Uint8,
+        "UINT_16" => Uint16,
+        "UINT_32" => Uint32,
+        "UINT_64" => Uint64,
+        "INT_8" => Int8,
+        "INT_16" => Int16,
+        "INT_32" => Int32,
+        "INT_64" => Int64,
+        "JSON" => Json,
+        "BSON" => Bson,
+        "INTERVAL" => Interval,
+        _ => return None,
+    })
+}
+
+fn repetition_from_str(s: &str) -> Result<Repetition> {
+    Ok(match s {
+        "REQUIRED" => Repetition::Required,
+        "OPTIONAL" => Repetition::Optional,
+        "REPEATED" => Repetition::Repeated,
+        other => return Err(Error::oos(format!("Invalid repetition {}", other))),
+    })
+}
+
+fn type_from_str(s: &str) -> Result<Type> {
+    match s {
+        "BOOLEAN" => Ok(Type::BOOLEAN),
+        "INT32" => Ok(Type::INT32),
+        "INT64" => Ok(Type::INT64),
+        "INT96" => Ok(Type::INT96),
+        "FLOAT" => Ok(Type::FLOAT),
+        "DOUBLE" => Ok(Type::DOUBLE),
+        "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
+        "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
+        other => Err(Error::oos(format!("Invalid type {}", other))),
+    }
+}
+
+/// Parses message type as string into a Parquet [`ParquetType`](crate::parquet::schema::types::ParquetType)
+/// which, for example, could be used to extract individual columns. Returns Parquet
+/// general error when parsing or validation fails.
+pub fn from_message(message_type: &str) -> Result<ParquetType> {
+    let mut parser = Parser {
+        tokenizer: &mut Tokenizer::from_str(message_type),
+    };
+    parser.parse_message_type()
+}
+
+/// Tokenizer to split message type string into tokens that are separated using characters
+/// defined in `is_schema_delim` method. Tokenizer also preserves delimiters as tokens.
+/// Tokenizer provides Iterator interface to process tokens; it also allows to step back
+/// to reprocess previous tokens.
+struct Tokenizer<'a> {
+    // List of all tokens for a string
+    tokens: Vec<&'a str>,
+    // Current index of vector
+    index: usize,
+}
+
+impl<'a> Tokenizer<'a> {
+    // Create tokenizer from message type string
+    pub fn from_str(string: &'a str) -> Self {
+        let vec = string
+            .split_whitespace()
+            .flat_map(Self::split_token)
+            .collect();
+        Tokenizer {
+            tokens: vec,
+            index: 0,
+        }
+    }
+
+    // List of all special characters in schema
+    fn is_schema_delim(c: char) -> bool {
+        c == ';' || c == '{' || c == '}' || c == '(' || c == ')' || c == '=' || c == ','
+    }
+
+    /// Splits string into tokens; input string can already be token or can contain
+    /// delimiters, e.g. required" -> Vec("required") and
+    /// "(UTF8);" -> Vec("(", "UTF8", ")", ";")
+    fn split_token(string: &str) -> Vec<&str> {
+        let mut buffer: Vec<&str> = Vec::new();
+        let mut tail = string;
+        while let Some(index) = tail.find(Self::is_schema_delim) {
+            let (h, t) = tail.split_at(index);
+            if !h.is_empty() {
+                buffer.push(h);
+            }
+            buffer.push(&t[0..1]);
+            tail = &t[1..];
+        }
+        if !tail.is_empty() {
+            buffer.push(tail);
+        }
+        buffer
+    }
+
+    // Move pointer to a previous element
+    fn backtrack(&mut self) {
+        self.index -= 1;
+    }
+}
+
+impl<'a> Iterator for Tokenizer<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<&'a str> {
+        if self.index < self.tokens.len() {
+            self.index += 1;
+            Some(self.tokens[self.index - 1])
+        } else {
+            None
+        }
+    }
+}
+
+/// Internal Schema parser.
+/// Traverses message type using tokenizer and parses each group/primitive type
+/// recursively.
+struct Parser<'a> {
+    tokenizer: &'a mut Tokenizer<'a>,
+}
+
+// Utility function to assert token on validity.
+fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
+    match token {
+        Some(value) if value == expected => Ok(()),
+        Some(other) => Err(Error::oos(format!(
+            "Expected '{}', found token '{}'",
+            expected, other
+        ))),
+        None => Err(Error::oos(format!(
+            "Expected '{}', but no token found (None)",
+            expected
+        ))),
+    }
+}
+
+// Utility function to parse i32 or return general error.
+fn parse_i32(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<i32> {
+    value
+        .ok_or_else(|| Error::oos(not_found_msg))
+        .and_then(|v| v.parse::<i32>().map_err(|_| Error::oos(parse_fail_msg)))
+}
+
+// Utility function to parse boolean or return general error.
+#[inline]
+fn parse_bool(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<bool> {
+    value
+        .ok_or_else(|| Error::oos(not_found_msg))
+        .and_then(|v| {
+            v.to_lowercase()
+                .parse::<bool>()
+                .map_err(|_| Error::oos(parse_fail_msg))
+        })
+}
+
+// Utility function to parse TimeUnit or return general error.
+fn parse_timeunit(
+    value: Option<&str>,
+    not_found_msg: &str,
+    parse_fail_msg: &str,
+) -> Result<TimeUnit> {
+    value
+        .ok_or_else(|| Error::oos(not_found_msg))
+        .and_then(|v| match v.to_uppercase().as_str() {
+            "MILLIS" => Ok(TimeUnit::Milliseconds),
+            "MICROS" => Ok(TimeUnit::Microseconds),
+            "NANOS" => Ok(TimeUnit::Nanoseconds),
+            _ => Err(Error::oos(parse_fail_msg)),
+        })
+}
+
+impl<'a> Parser<'a> {
+    // Entry function to parse message type, uses internal tokenizer.
+    fn parse_message_type(&mut self) -> Result<ParquetType> {
+        // Check that message type starts with "message".
+        match self.tokenizer.next() {
+            Some("message") => {
+                let name = self
+                    .tokenizer
+                    .next()
+                    .ok_or_else(|| Error::oos("Expected name, found None"))?;
+                let fields = self.parse_child_types()?;
+                Ok(ParquetType::new_root(name.to_string(), fields))
+            },
+            _ => Err(Error::oos("Message type does not start with 'message'")),
+        }
+    }
+
+    // Parses child types for a current group type.
+    // This is only invoked on root and group types.
+    fn parse_child_types(&mut self) -> Result<Vec<ParquetType>> {
+        assert_token(self.tokenizer.next(), "{")?;
+        let mut vec = Vec::new();
+        while let Some(value) = self.tokenizer.next() {
+            if value == "}" {
+                break;
+            } else {
+                self.tokenizer.backtrack();
+                vec.push(self.add_type()?);
+            }
+        }
+        Ok(vec)
+    }
+
+    fn add_type(&mut self) -> Result<ParquetType> {
+        // Parse repetition
+        let repetition = self
+            .tokenizer
+            .next()
+            .ok_or_else(|| Error::oos("Expected repetition, found None"))
+            .and_then(|v| repetition_from_str(&v.to_uppercase()))?;
+
+        match self.tokenizer.next() {
+            Some(group) if group.to_uppercase() == "GROUP" => self.add_group_type(repetition),
+            Some(type_string) => {
+                let physical_type = type_from_str(&type_string.to_uppercase())?;
+                self.add_primitive_type(repetition, physical_type)
+            },
+            None => Err(Error::oos("Invalid type, could not extract next token")),
+        }
+    }
+
+    fn add_group_type(&mut self, repetition: Repetition) -> Result<ParquetType> {
+        // Parse name of the group type
+        let name = self
+            .tokenizer
+            .next()
+            .ok_or_else(|| Error::oos("Expected name, found None"))?;
+
+        // Parse converted type if exists
+        let converted_type = if let Some("(") = self.tokenizer.next() {
+            let converted_type = self
+                .tokenizer
+                .next()
+                .ok_or_else(|| Error::oos("Expected converted type, found None"))
+                .and_then(|v| converted_group_from_str(&v.to_uppercase()))?;
+            assert_token(self.tokenizer.next(), ")")?;
+            Some(converted_type)
+        } else {
+            self.tokenizer.backtrack();
+            None
+        };
+
+        // Parse optional id
+        let id = if let Some("=") = self.tokenizer.next() {
+            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
+        } else {
+            self.tokenizer.backtrack();
+            None
+        };
+
+        let fields = self.parse_child_types()?;
+
+        Ok(ParquetType::from_converted(
+            name.to_string(),
+            fields,
+            repetition,
+            converted_type,
+            id,
+        ))
+    }
+
+    fn add_primitive_type(
+        &mut self,
+        repetition: Repetition,
+        physical_type: Type,
+    ) -> Result<ParquetType> {
+        // Read type length if the type is FIXED_LEN_BYTE_ARRAY.
+        let length = if physical_type == Type::FIXED_LEN_BYTE_ARRAY {
+            assert_token(self.tokenizer.next(), "(")?;
+            let length = parse_i32(
+                self.tokenizer.next(),
+                "Expected length for FIXED_LEN_BYTE_ARRAY, found None",
+                "Failed to parse length for FIXED_LEN_BYTE_ARRAY",
+            )?;
+            assert_token(self.tokenizer.next(), ")")?;
+            Some(length)
+        } else {
+            None
+        };
+
+        // Parse name of the primitive type
+        let name = self
+            .tokenizer
+            .next()
+            .ok_or_else(|| Error::oos("Expected name, found None"))?;
+
+        // Parse logical types
+        let (converted_type, logical_type) = if let Some("(") = self.tokenizer.next() {
+            let (is_logical_type, converted_type, token) = self
+                .tokenizer
+                .next()
+                .ok_or_else(|| Error::oos("Expected converted or logical type, found None"))
+                .and_then(|v| {
+                    let string = v.to_uppercase();
+                    Ok(if is_logical_type(&string) {
+                        (true, None, string)
+                    } else if is_converted_type(&string) {
+                        (false, converted_primitive_from_str(&string), string)
+                    } else {
+                        return Err(Error::oos(format!(
+                            "Expected converted or logical type, found {}",
+                            string
+                        )));
+                    })
+                })?;
+
+            let logical_type = if is_logical_type {
+                Some(self.parse_logical_type(&token)?)
+            } else {
+                None
+            };
+
+            // converted type decimal
+            let converted_type = match converted_type {
+                Some(PrimitiveConvertedType::Decimal(_, _)) => {
+                    Some(self.parse_converted_decimal()?)
+                },
+                other => other,
+            };
+
+            assert_token(self.tokenizer.next(), ")")?;
+            (converted_type, logical_type)
+        } else {
+            self.tokenizer.backtrack();
+            (None, None)
+        };
+
+        // Parse optional id
+        let id = if let Some("=") = self.tokenizer.next() {
+            self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
+        } else {
+            self.tokenizer.backtrack();
+            None
+        };
+        assert_token(self.tokenizer.next(), ";")?;
+
+        ParquetType::try_from_primitive(
+            name.to_string(),
+            (physical_type, length).try_into()?,
+            repetition,
+            converted_type,
+            logical_type,
+            id,
+        )
+    }
+
+    fn parse_converted_decimal(&mut self) -> Result<PrimitiveConvertedType> {
+        assert_token(self.tokenizer.next(), "(")?;
+        // Parse precision
+        let precision = parse_i32(
+            self.tokenizer.next(),
+            "Expected precision, found None",
+            "Failed to parse precision for DECIMAL type",
+        )?;
+
+        // Parse scale
+        let scale = if let Some(",") = self.tokenizer.next() {
+            parse_i32(
+                self.tokenizer.next(),
+                "Expected scale, found None",
+                "Failed to parse scale for DECIMAL type",
+            )?
+        } else {
+            // Scale is not provided, set it to 0.
+            self.tokenizer.backtrack();
+            0
+        };
+
+        assert_token(self.tokenizer.next(), ")")?;
+        Ok(PrimitiveConvertedType::Decimal(
+            precision.try_into()?,
+            scale.try_into()?,
+        ))
+    }
+
+    fn parse_logical_type(&mut self, tpe: &str) -> Result<PrimitiveLogicalType> {
+        Ok(match tpe {
+            "ENUM" => PrimitiveLogicalType::Enum,
+            "DATE" => PrimitiveLogicalType::Date,
+            "DECIMAL" => {
+                let (precision, scale) = if let Some("(") = self.tokenizer.next() {
+                    let precision = parse_i32(
+                        self.tokenizer.next(),
+                        "Expected precision, found None",
+                        "Failed to parse precision for DECIMAL type",
+                    )?;
+                    let scale = if let Some(",") = self.tokenizer.next() {
+                        parse_i32(
+                            self.tokenizer.next(),
+                            "Expected scale, found None",
+                            "Failed to parse scale for DECIMAL type",
+                        )?
+                    } else {
+                        self.tokenizer.backtrack();
+                        0
+                    };
+                    assert_token(self.tokenizer.next(), ")")?;
+                    (precision, scale)
+                } else {
+                    self.tokenizer.backtrack();
+                    (0, 0)
+                };
+                PrimitiveLogicalType::Decimal(precision.try_into()?, scale.try_into()?)
+            },
+            "TIME" => {
+                let (unit, is_adjusted_to_utc) = if let Some("(") = self.tokenizer.next() {
+                    let unit = parse_timeunit(
+                        self.tokenizer.next(),
+                        "Invalid timeunit found",
+                        "Failed to parse timeunit for TIME type",
+                    )?;
+                    let is_adjusted_to_utc = if let Some(",") = self.tokenizer.next() {
+                        parse_bool(
+                            self.tokenizer.next(),
+                            "Invalid boolean found",
+                            "Failed to parse timezone info for TIME type",
+                        )?
+                    } else {
+                        self.tokenizer.backtrack();
+                        false
+                    };
+                    assert_token(self.tokenizer.next(), ")")?;
+                    (unit, is_adjusted_to_utc)
+                } else {
+                    self.tokenizer.backtrack();
+                    (TimeUnit::Milliseconds, false)
+                };
+                PrimitiveLogicalType::Time {
+                    is_adjusted_to_utc,
+                    unit,
+                }
+            },
+            "TIMESTAMP" => {
+                let (unit, is_adjusted_to_utc) = if let Some("(") = self.tokenizer.next() {
+                    let unit = parse_timeunit(
+                        self.tokenizer.next(),
+                        "Invalid timeunit found",
+                        "Failed to parse timeunit for TIMESTAMP type",
+                    )?;
+                    let is_adjusted_to_utc = if let Some(",") = self.tokenizer.next() {
+                        parse_bool(
+                            self.tokenizer.next(),
+                            "Invalid boolean found",
+                            "Failed to parse timezone info for TIMESTAMP type",
+                        )?
+                    } else {
+                        // Invalid token for unit
+                        self.tokenizer.backtrack();
+                        false
+                    };
+                    assert_token(self.tokenizer.next(), ")")?;
+                    (unit, is_adjusted_to_utc)
+                } else {
+                    self.tokenizer.backtrack();
+                    (TimeUnit::Milliseconds, false)
+                };
+                PrimitiveLogicalType::Timestamp {
+                    is_adjusted_to_utc,
+                    unit,
+                }
+            },
+            "INTEGER" => {
+                let (bit_width, is_signed) = if let Some("(") = self.tokenizer.next() {
+                    let bit_width = parse_i32(
+                        self.tokenizer.next(),
+                        "Invalid bit_width found",
+                        "Failed to parse bit_width for INTEGER type",
+                    )?;
+                    let is_signed = if let Some(",") = self.tokenizer.next() {
+                        parse_bool(
+                            self.tokenizer.next(),
+                            "Invalid boolean found",
+                            "Failed to parse is_signed for INTEGER type",
+                        )?
+                    } else {
+                        // Invalid token for unit
+                        self.tokenizer.backtrack();
+                        return Err(Error::oos("INTEGER requires sign"));
+                    };
+                    assert_token(self.tokenizer.next(), ")")?;
+                    (bit_width, is_signed)
+                } else {
+                    // Invalid token for unit
+                    self.tokenizer.backtrack();
+                    return Err(Error::oos("INTEGER requires width and sign"));
+                };
+                PrimitiveLogicalType::Integer((bit_width, is_signed).into())
+            },
+            "STRING" => PrimitiveLogicalType::String,
+            "JSON" => PrimitiveLogicalType::Json,
+            "BSON" => PrimitiveLogicalType::Bson,
+            "UUID" => PrimitiveLogicalType::Uuid,
+            "UNKNOWN" => PrimitiveLogicalType::Unknown,
+            "INTERVAL" => return Err(Error::oos("Interval logical type not yet supported")),
+            _ => unreachable!(),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use types::{IntegerType, PrimitiveLogicalType};
+
+    use super::*;
+    use crate::parquet::schema::types::{GroupConvertedType, PhysicalType, PrimitiveConvertedType};
+
+    #[test]
+    fn test_tokenize_empty_string() {
+        assert_eq!(Tokenizer::from_str("").next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_delimiters() {
+        let mut iter = Tokenizer::from_str(",;{}()=");
+        assert_eq!(iter.next(), Some(","));
+        assert_eq!(iter.next(), Some(";"));
+        assert_eq!(iter.next(), Some("{"));
+        assert_eq!(iter.next(), Some("}"));
+        assert_eq!(iter.next(), Some("("));
+        assert_eq!(iter.next(), Some(")"));
+        assert_eq!(iter.next(), Some("="));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_delimiters_with_whitespaces() {
+        let mut iter = Tokenizer::from_str(" , ; { } ( ) = ");
+        assert_eq!(iter.next(), Some(","));
+        assert_eq!(iter.next(), Some(";"));
+        assert_eq!(iter.next(), Some("{"));
+        assert_eq!(iter.next(), Some("}"));
+        assert_eq!(iter.next(), Some("("));
+        assert_eq!(iter.next(), Some(")"));
+        assert_eq!(iter.next(), Some("="));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_words() {
+        let mut iter = Tokenizer::from_str("abc def ghi jkl mno");
+        assert_eq!(iter.next(), Some("abc"));
+        assert_eq!(iter.next(), Some("def"));
+        assert_eq!(iter.next(), Some("ghi"));
+        assert_eq!(iter.next(), Some("jkl"));
+        assert_eq!(iter.next(), Some("mno"));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_backtrack() {
+        let mut iter = Tokenizer::from_str("abc;");
+        assert_eq!(iter.next(), Some("abc"));
+        assert_eq!(iter.next(), Some(";"));
+        iter.backtrack();
+        assert_eq!(iter.next(), Some(";"));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_tokenize_message_type() {
+        let schema = "
+    message schema {
+      required int32 a;
+      optional binary c (UTF8);
+      required group d {
+        required int32 a;
+        optional binary c (UTF8);
+      }
+      required group e (LIST) {
+        repeated group list {
+          required int32 element;
+        }
+      }
+    }
+    ";
+        let iter = Tokenizer::from_str(schema);
+        let mut res = Vec::new();
+        for token in iter {
+            res.push(token);
+        }
+        assert_eq!(
+            res,
+            vec![
+                "message", "schema", "{", "required", "int32", "a", ";", "optional", "binary", "c",
+                "(", "UTF8", ")", ";", "required", "group", "d", "{", "required", "int32", "a",
+                ";", "optional", "binary", "c", "(", "UTF8", ")", ";", "}", "required", "group",
+                "e", "(", "LIST", ")", "{", "repeated", "group", "list", "{", "required", "int32",
+                "element", ";", "}", "}", "}"
+            ]
+        );
+    }
+
+    #[test]
+    fn test_assert_token() {
+        assert!(assert_token(Some("a"), "a").is_ok());
+        assert!(assert_token(Some("a"), "b").is_err());
+        assert!(assert_token(None, "b").is_err());
+    }
+
+    #[test]
+    fn test_parse_message_type_invalid() {
+        let mut iter = Tokenizer::from_str("test");
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "File out of specification: Message type does not start with 'message'"
+        );
+    }
+
+    #[test]
+    fn test_parse_message_type_no_name() {
+        let mut iter = Tokenizer::from_str("message");
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "File out of specification: Expected name, found None"
+        );
+    }
+
+    #[test]
+    fn test_parse_message_type_fixed_byte_array() {
+        let schema = "
+    message schema {
+      REQUIRED FIXED_LEN_BYTE_ARRAY col;
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        let schema = "
+    message schema {
+      REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn test_parse_message_type_decimal() {
+        // It is okay for decimal to omit precision and scale with right syntax.
+        // Here we test wrong syntax of decimal type
+
+        // Invalid decimal syntax
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL();
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        // Invalid decimal, need precision and scale
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL());
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        // Invalid decimal because of `,` - has precision, needs scale
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL(8,));
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_parse_decimal_wrong() {
+        // Invalid decimal because, we always require either precision or scale to be
+        // specified as part of converted type
+        let schema = "
+    message root {
+      optional int32 f3 (DECIMAL);
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_err());
+
+        // Valid decimal (precision, scale)
+        let schema = "
+    message root {
+      optional int32 f1 (DECIMAL(8, 3));
+      optional int32 f2 (DECIMAL(8));
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let result = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type();
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn test_parse_message_type_compare_1() -> Result<()> {
+        let schema = "
+    message root {
+      optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
+      optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let message = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type()
+        .unwrap();
+
+        let fields = vec![
+            ParquetType::try_from_primitive(
+                "f1".to_string(),
+                PhysicalType::FixedLenByteArray(5),
+                Repetition::Optional,
+                None,
+                Some(PrimitiveLogicalType::Decimal(9, 3)),
+                None,
+            )?,
+            ParquetType::try_from_primitive(
+                "f2".to_string(),
+                PhysicalType::FixedLenByteArray(16),
+                Repetition::Optional,
+                None,
+                Some(PrimitiveLogicalType::Decimal(38, 18)),
+                None,
+            )?,
+        ];
+
+        let expected = ParquetType::new_root("root".to_string(), fields);
+
+        assert_eq!(message, expected);
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_message_type_compare_2() -> Result<()> {
+        let schema = "
+    message root {
+      required group a0 {
+        optional group a1 (LIST) {
+          repeated binary a2 (UTF8);
+        }
+
+        optional group b1 (LIST) {
+          repeated group b2 {
+            optional int32 b3;
+            optional double b4;
+          }
+        }
+      }
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let message = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type()
+        .unwrap();
+
+        let a2 = ParquetType::try_from_primitive(
+            "a2".to_string(),
+            PhysicalType::ByteArray,
+            Repetition::Repeated,
+            Some(PrimitiveConvertedType::Utf8),
+            None,
+            None,
+        )?;
+        let a1 = ParquetType::from_converted(
+            "a1".to_string(),
+            vec![a2],
+            Repetition::Optional,
+            Some(GroupConvertedType::List),
+            None,
+        );
+        let b2 = ParquetType::from_converted(
+            "b2".to_string(),
+            vec![
+                ParquetType::from_physical("b3".to_string(), PhysicalType::Int32),
+                ParquetType::from_physical("b4".to_string(), PhysicalType::Double),
+            ],
+            Repetition::Repeated,
+            None,
+            None,
+        );
+        let b1 = ParquetType::from_converted(
+            "b1".to_string(),
+            vec![b2],
+            Repetition::Optional,
+            Some(GroupConvertedType::List),
+            None,
+        );
+        let a0 = ParquetType::from_converted(
+            "a0".to_string(),
+            vec![a1, b1],
+            Repetition::Required,
+            None,
+            None,
+        );
+
+        let expected = ParquetType::new_root("root".to_string(), vec![a0]);
+
+        assert_eq!(message, expected);
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_message_type_compare_3() -> Result<()> {
+        let schema = "
+    message root {
+      required int32 _1 (INT_8);
+      required int32 _2 (INT_16);
+      required float _3;
+      required double _4;
+      optional int32 _5 (DATE);
+      optional binary _6 (UTF8);
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let message = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type()
+        .unwrap();
+
+        let f1 = ParquetType::try_from_primitive(
+            "_1".to_string(),
+            PhysicalType::Int32,
+            Repetition::Required,
+            Some(PrimitiveConvertedType::Int8),
+            None,
+            None,
+        )?;
+        let f2 = ParquetType::try_from_primitive(
+            "_2".to_string(),
+            PhysicalType::Int32,
+            Repetition::Required,
+            Some(PrimitiveConvertedType::Int16),
+            None,
+            None,
+        )?;
+        let f3 = ParquetType::try_from_primitive(
+            "_3".to_string(),
+            PhysicalType::Float,
+            Repetition::Required,
+            None,
+            None,
+            None,
+        )?;
+        let f4 = ParquetType::try_from_primitive(
+            "_4".to_string(),
+            PhysicalType::Double,
+            Repetition::Required,
+            None,
+            None,
+            None,
+        )?;
+        let f5 = ParquetType::try_from_primitive(
+            "_5".to_string(),
+            PhysicalType::Int32,
+            Repetition::Optional,
+            None,
+            Some(PrimitiveLogicalType::Date),
+            None,
+        )?;
+        let f6 = ParquetType::try_from_primitive(
+            "_6".to_string(),
+            PhysicalType::ByteArray,
+            Repetition::Optional,
+            Some(PrimitiveConvertedType::Utf8),
+            None,
+            None,
+        )?;
+
+        let fields = vec![f1, f2, f3, f4, f5, f6];
+
+        let expected = ParquetType::new_root("root".to_string(), fields);
+        assert_eq!(message, expected);
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_message_type_compare_4() -> Result<()> {
+        let schema = "
+    message root {
+      required int32 _1 (INTEGER(8,true));
+      required int32 _2 (INTEGER(16,false));
+      required float _3;
+      required double _4;
+      optional int32 _5 (DATE);
+      optional int32 _6 (TIME(MILLIS,false));
+      optional int64 _7 (TIME(MICROS,true));
+      optional int64 _8 (TIMESTAMP(MILLIS,true));
+      optional int64 _9 (TIMESTAMP(NANOS,false));
+      optional binary _10 (STRING);
+    }
+    ";
+        let mut iter = Tokenizer::from_str(schema);
+        let message = Parser {
+            tokenizer: &mut iter,
+        }
+        .parse_message_type()?;
+
+        let f1 = ParquetType::try_from_primitive(
+            "_1".to_string(),
+            PhysicalType::Int32,
+            Repetition::Required,
+            None,
+            Some(PrimitiveLogicalType::Integer(IntegerType::Int8)),
+            None,
+        )?;
+        let f2 = ParquetType::try_from_primitive(
+            "_2".to_string(),
+            PhysicalType::Int32,
+            Repetition::Required,
+            None,
+            Some(PrimitiveLogicalType::Integer(IntegerType::UInt16)),
+            None,
+        )?;
+        let f3 = ParquetType::try_from_primitive(
+            "_3".to_string(),
+            PhysicalType::Float,
+            Repetition::Required,
+            None,
+            None,
+            None,
+        )?;
+        let f4 = ParquetType::try_from_primitive(
+            "_4".to_string(),
+            PhysicalType::Double,
+            Repetition::Required,
+            None,
+            None,
+            None,
+        )?;
+        let f5 = ParquetType::try_from_primitive(
+            "_5".to_string(),
+            PhysicalType::Int32,
+            Repetition::Optional,
+            None,
+            Some(PrimitiveLogicalType::Date),
+            None,
+        )?;
+        let f6 = ParquetType::try_from_primitive(
+            "_6".to_string(),
+            PhysicalType::Int32,
+            Repetition::Optional,
+            None,
+            Some(PrimitiveLogicalType::Time {
+                is_adjusted_to_utc: false,
+                unit: TimeUnit::Milliseconds,
+            }),
+            None,
+        )?;
+        let f7 = ParquetType::try_from_primitive(
+            "_7".to_string(),
+            PhysicalType::Int64,
+            Repetition::Optional,
+            None,
+            Some(PrimitiveLogicalType::Time {
+                is_adjusted_to_utc: true,
+                unit: TimeUnit::Microseconds,
+            }),
+            None,
+        )?;
+        let f8 = ParquetType::try_from_primitive(
+            "_8".to_string(),
+            PhysicalType::Int64,
+            Repetition::Optional,
+            None,
+            Some(PrimitiveLogicalType::Timestamp {
+                is_adjusted_to_utc: true,
+                unit: TimeUnit::Milliseconds,
+            }),
+            None,
+        )?;
+        let f9 = ParquetType::try_from_primitive(
+            "_9".to_string(),
+            PhysicalType::Int64,
+            Repetition::Optional,
+            None,
+            Some(PrimitiveLogicalType::Timestamp {
+                is_adjusted_to_utc: false,
+                unit: TimeUnit::Nanoseconds,
+            }),
+            None,
+        )?;
+
+        let f10 = ParquetType::try_from_primitive(
+            "_10".to_string(),
+            PhysicalType::ByteArray,
+            Repetition::Optional,
+            None,
+            Some(PrimitiveLogicalType::String),
+            None,
+        )?;
+
+        let fields = vec![f1, f2, f3, f4, f5, f6, f7, f8, f9, f10];
+
+        let expected = ParquetType::new_root("root".to_string(), fields);
+        assert_eq!(message, expected);
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/schema/io_message/mod.rs b/crates/polars-parquet/src/parquet/schema/io_message/mod.rs
new file mode 100644
index 000000000000..1e296a7f3724
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/io_message/mod.rs
@@ -0,0 +1,3 @@
+mod from_message;
+
+pub use from_message::from_message;
diff --git a/crates/polars-parquet/src/parquet/schema/io_thrift/from_thrift.rs b/crates/polars-parquet/src/parquet/schema/io_thrift/from_thrift.rs
new file mode 100644
index 000000000000..b99c0881fb89
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/io_thrift/from_thrift.rs
@@ -0,0 +1,134 @@
+use parquet_format_safe::SchemaElement;
+
+use super::super::types::ParquetType;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::types::FieldInfo;
+
+impl ParquetType {
+    /// Method to convert from Thrift.
+    pub fn try_from_thrift(elements: &[SchemaElement]) -> Result<ParquetType> {
+        let mut index = 0;
+        let mut schema_nodes = Vec::new();
+        while index < elements.len() {
+            let t = from_thrift_helper(elements, index)?;
+            index = t.0;
+            schema_nodes.push(t.1);
+        }
+        if schema_nodes.len() != 1 {
+            return Err(Error::oos(format!(
+                "Expected exactly one root node, but found {}",
+                schema_nodes.len()
+            )));
+        }
+
+        Ok(schema_nodes.remove(0))
+    }
+}
+
+/// Constructs a new Type from the `elements`, starting at index `index`.
+/// The first result is the starting index for the next Type after this one. If it is
+/// equal to `elements.len()`, then this Type is the last one.
+/// The second result is the result Type.
+fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize, ParquetType)> {
+    // Whether or not the current node is root (message type).
+    // There is only one message type node in the schema tree.
+    let is_root_node = index == 0;
+
+    let element = elements
+        .get(index)
+        .ok_or_else(|| Error::oos(format!("index {} on SchemaElement is not valid", index)))?;
+    let name = element.name.clone();
+    let converted_type = element.converted_type;
+
+    let id = element.field_id;
+    match element.num_children {
+        // From parquet-format:
+        //   The children count is used to construct the nested relationship.
+        //   This field is not set when the element is a primitive type
+        // Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we
+        // have to handle this case too.
+        None | Some(0) => {
+            // primitive type
+            let repetition = element
+                .repetition_type
+                .ok_or_else(|| Error::oos("Repetition level must be defined for a primitive type"))?
+                .try_into()?;
+            let physical_type = element
+                .type_
+                .ok_or_else(|| Error::oos("Physical type must be defined for a primitive type"))?;
+
+            let converted_type = converted_type
+                .map(|converted_type| {
+                    let maybe_decimal = match (element.precision, element.scale) {
+                        (Some(precision), Some(scale)) => Some((precision, scale)),
+                        (None, None) => None,
+                        _ => {
+                            return Err(Error::oos(
+                                "When precision or scale are defined, both must be defined",
+                            ))
+                        },
+                    };
+                    (converted_type, maybe_decimal).try_into()
+                })
+                .transpose()?;
+
+            let logical_type = element
+                .logical_type
+                .clone()
+                .map(|x| x.try_into())
+                .transpose()?;
+
+            let tp = ParquetType::try_from_primitive(
+                name,
+                (physical_type, element.type_length).try_into()?,
+                repetition,
+                converted_type,
+                logical_type,
+                id,
+            )?;
+
+            Ok((index + 1, tp))
+        },
+        Some(n) => {
+            let mut fields = vec![];
+            let mut next_index = index + 1;
+            for _ in 0..n {
+                let child_result = from_thrift_helper(elements, next_index)?;
+                next_index = child_result.0;
+                fields.push(child_result.1);
+            }
+
+            let tp = if is_root_node {
+                ParquetType::new_root(name, fields)
+            } else {
+                let repetition = if let Some(repetition) = element.repetition_type {
+                    repetition.try_into()?
+                } else {
+                    return Err(Error::oos(
+                        "The repetition level of a non-root must be non-null",
+                    ));
+                };
+
+                let converted_type = converted_type.map(|x| x.try_into()).transpose()?;
+
+                let logical_type = element
+                    .logical_type
+                    .clone()
+                    .map(|x| x.try_into())
+                    .transpose()?;
+
+                ParquetType::GroupType {
+                    field_info: FieldInfo {
+                        name,
+                        repetition,
+                        id,
+                    },
+                    fields,
+                    converted_type,
+                    logical_type,
+                }
+            };
+            Ok((next_index, tp))
+        },
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/schema/io_thrift/mod.rs b/crates/polars-parquet/src/parquet/schema/io_thrift/mod.rs
new file mode 100644
index 000000000000..5176eb131ff2
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/io_thrift/mod.rs
@@ -0,0 +1,85 @@
+mod from_thrift;
+pub use from_thrift::*;
+
+mod to_thrift;
+pub use to_thrift::*;
+
+#[cfg(test)]
+mod tests {
+    use crate::parquet::error::Result;
+    use crate::parquet::schema::io_message::from_message;
+    use crate::parquet::schema::types::ParquetType;
+
+    fn test_round_trip(message: &str) -> Result<()> {
+        let expected_schema = from_message(message)?;
+        let thrift_schema = expected_schema.to_thrift();
+        let thrift_schema = thrift_schema.into_iter().collect::<Vec<_>>();
+        let result_schema = ParquetType::try_from_thrift(&thrift_schema)?;
+        assert_eq!(result_schema, expected_schema);
+        Ok(())
+    }
+
+    #[test]
+    fn test_schema_type_thrift_conversion() {
+        let message_type = "
+    message conversions {
+      REQUIRED INT64 id;
+      OPTIONAL group int_array_Array (LIST) {
+        REPEATED group list {
+          OPTIONAL group element (LIST) {
+            REPEATED group list {
+              OPTIONAL INT32 element;
+            }
+          }
+        }
+      }
+      OPTIONAL group int_map (MAP) {
+        REPEATED group map (MAP_KEY_VALUE) {
+          REQUIRED BYTE_ARRAY key (UTF8);
+          OPTIONAL INT32 value;
+        }
+      }
+      OPTIONAL group int_Map_Array (LIST) {
+        REPEATED group list {
+          OPTIONAL group g (MAP) {
+            REPEATED group map (MAP_KEY_VALUE) {
+              REQUIRED BYTE_ARRAY key (UTF8);
+              OPTIONAL group value {
+                OPTIONAL group H {
+                  OPTIONAL group i (LIST) {
+                    REPEATED group list {
+                      OPTIONAL DOUBLE element;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      OPTIONAL group nested_struct {
+        OPTIONAL INT32 A;
+        OPTIONAL group b (LIST) {
+          REPEATED group list {
+            REQUIRED FIXED_LEN_BYTE_ARRAY (16) element;
+          }
+        }
+      }
+    }
+    ";
+        test_round_trip(message_type).unwrap();
+    }
+
+    #[test]
+    fn test_schema_type_thrift_conversion_decimal() {
+        let message_type = "
+    message decimals {
+      OPTIONAL INT32 field0;
+      OPTIONAL INT64 field1 (DECIMAL (18, 2));
+      OPTIONAL FIXED_LEN_BYTE_ARRAY (16) field2 (DECIMAL (38, 18));
+      OPTIONAL BYTE_ARRAY field3 (DECIMAL (9));
+    }
+    ";
+        test_round_trip(message_type).unwrap();
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/schema/io_thrift/to_thrift.rs b/crates/polars-parquet/src/parquet/schema/io_thrift/to_thrift.rs
new file mode 100644
index 000000000000..27c9d886b2ef
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/io_thrift/to_thrift.rs
@@ -0,0 +1,82 @@
+use parquet_format_safe::{ConvertedType, SchemaElement};
+
+use super::super::types::ParquetType;
+use crate::parquet::schema::types::PrimitiveType;
+
+impl ParquetType {
+    /// Method to convert to Thrift.
+    pub(crate) fn to_thrift(&self) -> Vec<SchemaElement> {
+        let mut elements: Vec<SchemaElement> = Vec::new();
+        to_thrift_helper(self, &mut elements, true);
+        elements
+    }
+}
+
+/// Constructs list of `SchemaElement` from the schema using depth-first traversal.
+/// Here we assume that schema is always valid and starts with group type.
+fn to_thrift_helper(schema: &ParquetType, elements: &mut Vec<SchemaElement>, is_root: bool) {
+    match schema {
+        ParquetType::PrimitiveType(PrimitiveType {
+            field_info,
+            logical_type,
+            converted_type,
+            physical_type,
+        }) => {
+            let (type_, type_length) = (*physical_type).into();
+            let (converted_type, maybe_decimal) = converted_type
+                .map(|x| x.into())
+                .map(|x: (ConvertedType, Option<(i32, i32)>)| (Some(x.0), x.1))
+                .unwrap_or((None, None));
+
+            let element = SchemaElement {
+                type_: Some(type_),
+                type_length,
+                repetition_type: Some(field_info.repetition.into()),
+                name: field_info.name.clone(),
+                num_children: None,
+                converted_type,
+                precision: maybe_decimal.map(|x| x.0),
+                scale: maybe_decimal.map(|x| x.1),
+                field_id: field_info.id,
+                logical_type: logical_type.map(|x| x.into()),
+            };
+
+            elements.push(element);
+        },
+        ParquetType::GroupType {
+            field_info,
+            fields,
+            logical_type,
+            converted_type,
+        } => {
+            let converted_type = converted_type.map(|x| x.into());
+
+            let repetition_type = if is_root {
+                // https://github.com/apache/parquet-format/blob/7f06e838cbd1b7dbd722ff2580b9c2525e37fc46/src/main/thrift/parquet.thrift#L363
+                None
+            } else {
+                Some(field_info.repetition)
+            };
+
+            let element = SchemaElement {
+                type_: None,
+                type_length: None,
+                repetition_type: repetition_type.map(|x| x.into()),
+                name: field_info.name.clone(),
+                num_children: Some(fields.len() as i32),
+                converted_type,
+                scale: None,
+                precision: None,
+                field_id: field_info.id,
+                logical_type: logical_type.map(|x| x.into()),
+            };
+
+            elements.push(element);
+
+            // Add child elements for a group
+            for field in fields {
+                to_thrift_helper(field, elements, false);
+            }
+        },
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/schema/mod.rs b/crates/polars-parquet/src/parquet/schema/mod.rs
new file mode 100644
index 000000000000..af1918afa7f9
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/mod.rs
@@ -0,0 +1,7 @@
+pub use super::thrift_format::SchemaElement;
+pub use crate::parquet::parquet_bridge::Repetition;
+
+pub mod io_message;
+pub mod io_thrift;
+
+pub mod types;
diff --git a/crates/polars-parquet/src/parquet/schema/types/basic_type.rs b/crates/polars-parquet/src/parquet/schema/types/basic_type.rs
new file mode 100644
index 000000000000..b3697fcaa1c3
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/types/basic_type.rs
@@ -0,0 +1,16 @@
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use super::super::Repetition;
+
+/// Common type information.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct FieldInfo {
+    /// The field name
+    pub name: String,
+    /// The repetition
+    pub repetition: Repetition,
+    /// the optional id, to select fields by id
+    pub id: Option<i32>,
+}
diff --git a/crates/polars-parquet/src/parquet/schema/types/converted_type.rs b/crates/polars-parquet/src/parquet/schema/types/converted_type.rs
new file mode 100644
index 000000000000..078d2324574c
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/types/converted_type.rs
@@ -0,0 +1,238 @@
+use parquet_format_safe::ConvertedType;
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use crate::parquet::error::Error;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum PrimitiveConvertedType {
+    Utf8,
+    /// an enum is converted into a binary field
+    Enum,
+    /// A decimal value.
+    ///
+    /// This may be used to annotate binary or fixed primitive types. The
+    /// underlying byte array stores the unscaled value encoded as two's
+    /// complement using big-endian byte order (the most significant byte is the
+    /// zeroth element). The value of the decimal is the value * 10^{-scale}.
+    ///
+    /// This must be accompanied by a (maximum) precision and a scale in the
+    /// SchemaElement. The precision specifies the number of digits in the decimal
+    /// and the scale stores the location of the decimal point. For example 1.23
+    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
+    /// 2 digits over).
+    // (precision, scale)
+    Decimal(usize, usize),
+    /// A Date
+    ///
+    /// Stored as days since Unix epoch, encoded as the INT32 physical type.
+    ///
+    Date,
+    /// A time
+    ///
+    /// The total number of milliseconds since midnight.  The value is stored
+    /// as an INT32 physical type.
+    TimeMillis,
+    /// A time.
+    ///
+    /// The total number of microseconds since midnight.  The value is stored as
+    /// an INT64 physical type.
+    TimeMicros,
+    /// A date/time combination
+    ///
+    /// Date and time recorded as milliseconds since the Unix epoch.  Recorded as
+    /// a physical type of INT64.
+    TimestampMillis,
+    /// A date/time combination
+    ///
+    /// Date and time recorded as microseconds since the Unix epoch.  The value is
+    /// stored as an INT64 physical type.
+    TimestampMicros,
+    /// An unsigned integer value.
+    ///
+    /// The number describes the maximum number of meainful data bits in
+    /// the stored value. 8, 16 and 32 bit values are stored using the
+    /// INT32 physical type.  64 bit values are stored using the INT64
+    /// physical type.
+    ///
+    Uint8,
+    Uint16,
+    Uint32,
+    Uint64,
+    /// A signed integer value.
+    ///
+    /// The number describes the maximum number of meainful data bits in
+    /// the stored value. 8, 16 and 32 bit values are stored using the
+    /// INT32 physical type.  64 bit values are stored using the INT64
+    /// physical type.
+    ///
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    /// An embedded JSON document
+    ///
+    /// A JSON document embedded within a single UTF8 column.
+    Json,
+    /// An embedded BSON document
+    ///
+    /// A BSON document embedded within a single BINARY column.
+    Bson,
+    /// An interval of time
+    ///
+    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12
+    /// This data is composed of three separate little endian unsigned
+    /// integers.  Each stores a component of a duration of time.  The first
+    /// integer identifies the number of months associated with the duration,
+    /// the second identifies the number of days associated with the duration
+    /// and the third identifies the number of milliseconds associated with
+    /// the provided duration.  This duration of time is independent of any
+    /// particular timezone or date.
+    Interval,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum GroupConvertedType {
+    /// a map is converted as an optional field containing a repeated key/value pair
+    Map,
+    /// a key/value pair is converted into a group of two fields
+    MapKeyValue,
+    /// a list is converted into an optional field containing a repeated field for its
+    /// values
+    List,
+}
+
+impl TryFrom<(ConvertedType, Option<(i32, i32)>)> for PrimitiveConvertedType {
+    type Error = Error;
+
+    fn try_from(
+        (ty, maybe_decimal): (ConvertedType, Option<(i32, i32)>),
+    ) -> Result<Self, Self::Error> {
+        use PrimitiveConvertedType::*;
+        Ok(match ty {
+            ConvertedType::UTF8 => Utf8,
+            ConvertedType::ENUM => Enum,
+            ConvertedType::DECIMAL => {
+                if let Some((precision, scale)) = maybe_decimal {
+                    Decimal(precision.try_into()?, scale.try_into()?)
+                } else {
+                    return Err(Error::oos("Decimal requires a precision and scale"));
+                }
+            },
+            ConvertedType::DATE => Date,
+            ConvertedType::TIME_MILLIS => TimeMillis,
+            ConvertedType::TIME_MICROS => TimeMicros,
+            ConvertedType::TIMESTAMP_MILLIS => TimestampMillis,
+            ConvertedType::TIMESTAMP_MICROS => TimestampMicros,
+            ConvertedType::UINT_8 => Uint8,
+            ConvertedType::UINT_16 => Uint16,
+            ConvertedType::UINT_32 => Uint32,
+            ConvertedType::UINT_64 => Uint64,
+            ConvertedType::INT_8 => Int8,
+            ConvertedType::INT_16 => Int16,
+            ConvertedType::INT_32 => Int32,
+            ConvertedType::INT_64 => Int64,
+            ConvertedType::JSON => Json,
+            ConvertedType::BSON => Bson,
+            ConvertedType::INTERVAL => Interval,
+            _ => {
+                return Err(Error::oos(format!(
+                    "Converted type \"{:?}\" cannot be applied to a primitive type",
+                    ty
+                )))
+            },
+        })
+    }
+}
+
+impl TryFrom<ConvertedType> for GroupConvertedType {
+    type Error = Error;
+
+    fn try_from(type_: ConvertedType) -> Result<Self, Self::Error> {
+        Ok(match type_ {
+            ConvertedType::LIST => GroupConvertedType::List,
+            ConvertedType::MAP => GroupConvertedType::Map,
+            ConvertedType::MAP_KEY_VALUE => GroupConvertedType::MapKeyValue,
+            _ => return Err(Error::oos("LogicalType value out of range")),
+        })
+    }
+}
+
+impl From<GroupConvertedType> for ConvertedType {
+    fn from(type_: GroupConvertedType) -> Self {
+        match type_ {
+            GroupConvertedType::Map => ConvertedType::MAP,
+            GroupConvertedType::List => ConvertedType::LIST,
+            GroupConvertedType::MapKeyValue => ConvertedType::MAP_KEY_VALUE,
+        }
+    }
+}
+
+impl From<PrimitiveConvertedType> for (ConvertedType, Option<(i32, i32)>) {
+    fn from(ty: PrimitiveConvertedType) -> Self {
+        use PrimitiveConvertedType::*;
+        match ty {
+            Utf8 => (ConvertedType::UTF8, None),
+            Enum => (ConvertedType::ENUM, None),
+            Decimal(precision, scale) => (
+                ConvertedType::DECIMAL,
+                Some((precision as i32, scale as i32)),
+            ),
+            Date => (ConvertedType::DATE, None),
+            TimeMillis => (ConvertedType::TIME_MILLIS, None),
+            TimeMicros => (ConvertedType::TIME_MICROS, None),
+            TimestampMillis => (ConvertedType::TIMESTAMP_MILLIS, None),
+            TimestampMicros => (ConvertedType::TIMESTAMP_MICROS, None),
+            Uint8 => (ConvertedType::UINT_8, None),
+            Uint16 => (ConvertedType::UINT_16, None),
+            Uint32 => (ConvertedType::UINT_32, None),
+            Uint64 => (ConvertedType::UINT_64, None),
+            Int8 => (ConvertedType::INT_8, None),
+            Int16 => (ConvertedType::INT_16, None),
+            Int32 => (ConvertedType::INT_32, None),
+            Int64 => (ConvertedType::INT_64, None),
+            Json => (ConvertedType::JSON, None),
+            Bson => (ConvertedType::BSON, None),
+            Interval => (ConvertedType::INTERVAL, None),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn round_trip() -> Result<(), Error> {
+        use PrimitiveConvertedType::*;
+        let a = vec![
+            Utf8,
+            Enum,
+            Decimal(3, 1),
+            Date,
+            TimeMillis,
+            TimeMicros,
+            TimestampMillis,
+            TimestampMicros,
+            Uint8,
+            Uint16,
+            Uint32,
+            Uint64,
+            Int8,
+            Int16,
+            Int32,
+            Int64,
+            Json,
+            Bson,
+            Interval,
+        ];
+        for a in a {
+            let (c, d): (ConvertedType, Option<(i32, i32)>) = a.into();
+            let e: PrimitiveConvertedType = (c, d).try_into()?;
+            assert_eq!(e, a);
+        }
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/schema/types/mod.rs b/crates/polars-parquet/src/parquet/schema/types/mod.rs
new file mode 100644
index 000000000000..0516d75069bb
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/types/mod.rs
@@ -0,0 +1,17 @@
+mod spec;
+
+mod physical_type;
+pub use physical_type::*;
+
+mod basic_type;
+pub use basic_type::*;
+
+mod converted_type;
+pub use converted_type::*;
+
+mod parquet_type;
+pub use parquet_type::*;
+
+pub use crate::parquet::parquet_bridge::{
+    GroupLogicalType, IntegerType, PrimitiveLogicalType, TimeUnit,
+};
diff --git a/crates/polars-parquet/src/parquet/schema/types/parquet_type.rs b/crates/polars-parquet/src/parquet/schema/types/parquet_type.rs
new file mode 100644
index 000000000000..010f23ccde3a
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/types/parquet_type.rs
@@ -0,0 +1,206 @@
+// see https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+use polars_utils::aliases::*;
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use super::super::Repetition;
+use super::{
+    spec, FieldInfo, GroupConvertedType, GroupLogicalType, PhysicalType, PrimitiveConvertedType,
+    PrimitiveLogicalType,
+};
+use crate::parquet::error::Result;
+
+/// The complete description of a parquet column
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub struct PrimitiveType {
+    /// The fields' generic information
+    pub field_info: FieldInfo,
+    /// The optional logical type
+    pub logical_type: Option<PrimitiveLogicalType>,
+    /// The optional converted type
+    pub converted_type: Option<PrimitiveConvertedType>,
+    /// The physical type
+    pub physical_type: PhysicalType,
+}
+
+impl PrimitiveType {
+    /// Helper method to create an optional field with no logical or converted types.
+    pub fn from_physical(name: String, physical_type: PhysicalType) -> Self {
+        let field_info = FieldInfo {
+            name,
+            repetition: Repetition::Optional,
+            id: None,
+        };
+        Self {
+            field_info,
+            converted_type: None,
+            logical_type: None,
+            physical_type,
+        }
+    }
+}
+
+/// Representation of a Parquet type describing primitive and nested fields,
+/// including the top-level schema of the parquet file.
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum ParquetType {
+    PrimitiveType(PrimitiveType),
+    GroupType {
+        field_info: FieldInfo,
+        logical_type: Option<GroupLogicalType>,
+        converted_type: Option<GroupConvertedType>,
+        fields: Vec<ParquetType>,
+    },
+}
+
+/// Accessors
+impl ParquetType {
+    /// Returns [`FieldInfo`] information about the type.
+    pub fn get_field_info(&self) -> &FieldInfo {
+        match self {
+            Self::PrimitiveType(primitive) => &primitive.field_info,
+            Self::GroupType { field_info, .. } => field_info,
+        }
+    }
+
+    /// Returns this type's field name.
+    pub fn name(&self) -> &str {
+        &self.get_field_info().name
+    }
+
+    /// Checks if `sub_type` schema is part of current schema.
+    /// This method can be used to check if projected columns are part of the root schema.
+    pub fn check_contains(&self, sub_type: &ParquetType) -> bool {
+        let basic_match = self.get_field_info() == sub_type.get_field_info();
+
+        match (self, sub_type) {
+            (
+                Self::PrimitiveType(PrimitiveType { physical_type, .. }),
+                Self::PrimitiveType(PrimitiveType {
+                    physical_type: other_physical_type,
+                    ..
+                }),
+            ) => basic_match && physical_type == other_physical_type,
+            (
+                Self::GroupType { fields, .. },
+                Self::GroupType {
+                    fields: other_fields,
+                    ..
+                },
+            ) => {
+                // build hashmap of name -> Type
+                let mut field_map = PlHashMap::new();
+                for field in fields {
+                    field_map.insert(field.name(), field);
+                }
+
+                for field in other_fields {
+                    if !field_map
+                        .get(field.name())
+                        .map(|tpe| tpe.check_contains(field))
+                        .unwrap_or(false)
+                    {
+                        return false;
+                    }
+                }
+                true
+            },
+            _ => false,
+        }
+    }
+}
+
+/// Constructors
+impl ParquetType {
+    pub(crate) fn new_root(name: String, fields: Vec<ParquetType>) -> Self {
+        let field_info = FieldInfo {
+            name,
+            repetition: Repetition::Optional,
+            id: None,
+        };
+        ParquetType::GroupType {
+            field_info,
+            fields,
+            logical_type: None,
+            converted_type: None,
+        }
+    }
+
+    pub fn from_converted(
+        name: String,
+        fields: Vec<ParquetType>,
+        repetition: Repetition,
+        converted_type: Option<GroupConvertedType>,
+        id: Option<i32>,
+    ) -> Self {
+        let field_info = FieldInfo {
+            name,
+            repetition,
+            id,
+        };
+
+        ParquetType::GroupType {
+            field_info,
+            fields,
+            converted_type,
+            logical_type: None,
+        }
+    }
+
+    /// # Error
+    /// Errors iff the combination of physical, logical and converted type is not valid.
+    pub fn try_from_primitive(
+        name: String,
+        physical_type: PhysicalType,
+        repetition: Repetition,
+        converted_type: Option<PrimitiveConvertedType>,
+        logical_type: Option<PrimitiveLogicalType>,
+        id: Option<i32>,
+    ) -> Result<Self> {
+        spec::check_converted_invariants(&physical_type, &converted_type)?;
+        spec::check_logical_invariants(&physical_type, &logical_type)?;
+
+        let field_info = FieldInfo {
+            name,
+            repetition,
+            id,
+        };
+
+        Ok(ParquetType::PrimitiveType(PrimitiveType {
+            field_info,
+            converted_type,
+            logical_type,
+            physical_type,
+        }))
+    }
+
+    /// Helper method to create a [`ParquetType::PrimitiveType`] optional field
+    /// with no logical or converted types.
+    pub fn from_physical(name: String, physical_type: PhysicalType) -> Self {
+        ParquetType::PrimitiveType(PrimitiveType::from_physical(name, physical_type))
+    }
+
+    pub fn from_group(
+        name: String,
+        repetition: Repetition,
+        converted_type: Option<GroupConvertedType>,
+        logical_type: Option<GroupLogicalType>,
+        fields: Vec<ParquetType>,
+        id: Option<i32>,
+    ) -> Self {
+        let field_info = FieldInfo {
+            name,
+            repetition,
+            id,
+        };
+
+        ParquetType::GroupType {
+            field_info,
+            logical_type,
+            converted_type,
+            fields,
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/schema/types/physical_type.rs b/crates/polars-parquet/src/parquet/schema/types/physical_type.rs
new file mode 100644
index 000000000000..ad576935a049
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/types/physical_type.rs
@@ -0,0 +1,58 @@
+use parquet_format_safe::Type;
+#[cfg(feature = "serde_types")]
+use serde::{Deserialize, Serialize};
+
+use crate::parquet::error::Error;
+
+/// The set of all physical types representable in Parquet
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
+pub enum PhysicalType {
+    Boolean,
+    Int32,
+    Int64,
+    Int96,
+    Float,
+    Double,
+    ByteArray,
+    FixedLenByteArray(usize),
+}
+
+impl TryFrom<(Type, Option<i32>)> for PhysicalType {
+    type Error = Error;
+
+    fn try_from((type_, length): (Type, Option<i32>)) -> Result<Self, Self::Error> {
+        Ok(match type_ {
+            Type::BOOLEAN => PhysicalType::Boolean,
+            Type::INT32 => PhysicalType::Int32,
+            Type::INT64 => PhysicalType::Int64,
+            Type::INT96 => PhysicalType::Int96,
+            Type::FLOAT => PhysicalType::Float,
+            Type::DOUBLE => PhysicalType::Double,
+            Type::BYTE_ARRAY => PhysicalType::ByteArray,
+            Type::FIXED_LEN_BYTE_ARRAY => {
+                let length = length
+                    .ok_or_else(|| Error::oos("Length must be defined for FixedLenByteArray"))?;
+                PhysicalType::FixedLenByteArray(length.try_into()?)
+            },
+            _ => return Err(Error::oos("Unknown type")),
+        })
+    }
+}
+
+impl From<PhysicalType> for (Type, Option<i32>) {
+    fn from(physical_type: PhysicalType) -> Self {
+        match physical_type {
+            PhysicalType::Boolean => (Type::BOOLEAN, None),
+            PhysicalType::Int32 => (Type::INT32, None),
+            PhysicalType::Int64 => (Type::INT64, None),
+            PhysicalType::Int96 => (Type::INT96, None),
+            PhysicalType::Float => (Type::FLOAT, None),
+            PhysicalType::Double => (Type::DOUBLE, None),
+            PhysicalType::ByteArray => (Type::BYTE_ARRAY, None),
+            PhysicalType::FixedLenByteArray(length) => {
+                (Type::FIXED_LEN_BYTE_ARRAY, Some(length as i32))
+            },
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/schema/types/spec.rs b/crates/polars-parquet/src/parquet/schema/types/spec.rs
new file mode 100644
index 000000000000..806048bb5065
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/schema/types/spec.rs
@@ -0,0 +1,181 @@
+// see https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+use super::{IntegerType, PhysicalType, PrimitiveConvertedType, PrimitiveLogicalType, TimeUnit};
+use crate::parquet::error::{Error, Result};
+
+fn check_decimal_invariants(
+    physical_type: &PhysicalType,
+    precision: usize,
+    scale: usize,
+) -> Result<()> {
+    if precision < 1 {
+        return Err(Error::oos(format!(
+            "DECIMAL precision must be larger than 0; It is {}",
+            precision,
+        )));
+    }
+    if scale > precision {
+        return Err(Error::oos(format!(
+            "Invalid DECIMAL: scale ({}) cannot be greater than precision \
+            ({})",
+            scale, precision
+        )));
+    }
+
+    match physical_type {
+        PhysicalType::Int32 => {
+            if !(1..=9).contains(&precision) {
+                return Err(Error::oos(format!(
+                    "Cannot represent INT32 as DECIMAL with precision {}",
+                    precision
+                )));
+            }
+        },
+        PhysicalType::Int64 => {
+            if !(1..=18).contains(&precision) {
+                return Err(Error::oos(format!(
+                    "Cannot represent INT64 as DECIMAL with precision {}",
+                    precision
+                )));
+            }
+        },
+        PhysicalType::FixedLenByteArray(length) => {
+            let oos_error = || Error::oos(format!("Byte Array length {} out of spec", length));
+            let max_precision = (2f64.powi(
+                (*length as i32)
+                    .checked_mul(8)
+                    .ok_or_else(oos_error)?
+                    .checked_sub(1)
+                    .ok_or_else(oos_error)?,
+            ) - 1f64)
+                .log10()
+                .floor() as usize;
+
+            if precision > max_precision {
+                return Err(Error::oos(format!(
+                    "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length {} and \
+                    precision {}. The max precision can only be {}",
+                    length, precision, max_precision
+                )));
+            }
+        },
+        PhysicalType::ByteArray => {},
+        _ => {
+            return Err(Error::oos(
+                "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
+                    .to_string(),
+            ))
+        },
+    };
+    Ok(())
+}
+
+pub fn check_converted_invariants(
+    physical_type: &PhysicalType,
+    converted_type: &Option<PrimitiveConvertedType>,
+) -> Result<()> {
+    if converted_type.is_none() {
+        return Ok(());
+    };
+    let converted_type = converted_type.as_ref().unwrap();
+
+    use PrimitiveConvertedType::*;
+    match converted_type {
+        Utf8 | Bson | Json => {
+            if physical_type != &PhysicalType::ByteArray {
+                return Err(Error::oos(format!(
+                    "{:?} can only annotate BYTE_ARRAY fields",
+                    converted_type
+                )));
+            }
+        },
+        Decimal(precision, scale) => {
+            check_decimal_invariants(physical_type, *precision, *scale)?;
+        },
+        Date | TimeMillis | Uint8 | Uint16 | Uint32 | Int8 | Int16 | Int32 => {
+            if physical_type != &PhysicalType::Int32 {
+                return Err(Error::oos(format!(
+                    "{:?} can only annotate INT32",
+                    converted_type
+                )));
+            }
+        },
+        TimeMicros | TimestampMillis | TimestampMicros | Uint64 | Int64 => {
+            if physical_type != &PhysicalType::Int64 {
+                return Err(Error::oos(format!(
+                    "{:?} can only annotate INT64",
+                    converted_type
+                )));
+            }
+        },
+        Interval => {
+            if physical_type != &PhysicalType::FixedLenByteArray(12) {
+                return Err(Error::oos(
+                    "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)".to_string(),
+                ));
+            }
+        },
+        Enum => {
+            if physical_type != &PhysicalType::ByteArray {
+                return Err(Error::oos(
+                    "ENUM can only annotate BYTE_ARRAY fields".to_string(),
+                ));
+            }
+        },
+    };
+    Ok(())
+}
+
+pub fn check_logical_invariants(
+    physical_type: &PhysicalType,
+    logical_type: &Option<PrimitiveLogicalType>,
+) -> Result<()> {
+    if logical_type.is_none() {
+        return Ok(());
+    };
+    let logical_type = logical_type.unwrap();
+
+    // Check that logical type and physical type are compatible
+    use PrimitiveLogicalType::*;
+    match (logical_type, physical_type) {
+        (Enum, PhysicalType::ByteArray) => {},
+        (Decimal(precision, scale), _) => {
+            check_decimal_invariants(physical_type, precision, scale)?;
+        },
+        (Date, PhysicalType::Int32) => {},
+        (
+            Time {
+                unit: TimeUnit::Milliseconds,
+                ..
+            },
+            PhysicalType::Int32,
+        ) => {},
+        (Time { unit, .. }, PhysicalType::Int64) => {
+            if unit == TimeUnit::Milliseconds {
+                return Err(Error::oos(
+                    "Cannot use millisecond unit on INT64 type".to_string(),
+                ));
+            }
+        },
+        (Timestamp { .. }, PhysicalType::Int64) => {},
+        (Integer(IntegerType::Int8), PhysicalType::Int32) => {},
+        (Integer(IntegerType::Int16), PhysicalType::Int32) => {},
+        (Integer(IntegerType::Int32), PhysicalType::Int32) => {},
+        (Integer(IntegerType::UInt8), PhysicalType::Int32) => {},
+        (Integer(IntegerType::UInt16), PhysicalType::Int32) => {},
+        (Integer(IntegerType::UInt32), PhysicalType::Int32) => {},
+        (Integer(IntegerType::UInt64), PhysicalType::Int64) => {},
+        (Integer(IntegerType::Int64), PhysicalType::Int64) => {},
+        // Null type
+        (Unknown, PhysicalType::Int32) => {},
+        (String | Json | Bson, PhysicalType::ByteArray) => {},
+        // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#uuid
+        (Uuid, PhysicalType::FixedLenByteArray(16)) => {},
+        (a, b) => {
+            return Err(Error::oos(format!(
+                "Cannot annotate {:?} from {:?} fields",
+                a, b
+            )))
+        },
+    };
+    Ok(())
+}
diff --git a/crates/polars-parquet/src/parquet/statistics/binary.rs b/crates/polars-parquet/src/parquet/statistics/binary.rs
new file mode 100644
index 000000000000..1f599d2fc0e1
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/statistics/binary.rs
@@ -0,0 +1,51 @@
+use std::sync::Arc;
+
+use parquet_format_safe::Statistics as ParquetStatistics;
+
+use super::Statistics;
+use crate::parquet::error::Result;
+use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BinaryStatistics {
+    pub primitive_type: PrimitiveType,
+    pub null_count: Option<i64>,
+    pub distinct_count: Option<i64>,
+    pub max_value: Option<Vec<u8>>,
+    pub min_value: Option<Vec<u8>>,
+}
+
+impl Statistics for BinaryStatistics {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &PhysicalType::ByteArray
+    }
+
+    fn null_count(&self) -> Option<i64> {
+        self.null_count
+    }
+}
+
+pub fn read(v: &ParquetStatistics, primitive_type: PrimitiveType) -> Result<Arc<dyn Statistics>> {
+    Ok(Arc::new(BinaryStatistics {
+        primitive_type,
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v.max_value.clone(),
+        min_value: v.min_value.clone(),
+    }))
+}
+
+pub fn write(v: &BinaryStatistics) -> ParquetStatistics {
+    ParquetStatistics {
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v.max_value.clone(),
+        min_value: v.min_value.clone(),
+        min: None,
+        max: None,
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/statistics/boolean.rs b/crates/polars-parquet/src/parquet/statistics/boolean.rs
new file mode 100644
index 000000000000..c167341073f2
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/statistics/boolean.rs
@@ -0,0 +1,72 @@
+use std::sync::Arc;
+
+use parquet_format_safe::Statistics as ParquetStatistics;
+
+use super::Statistics;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::types::PhysicalType;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BooleanStatistics {
+    pub null_count: Option<i64>,
+    pub distinct_count: Option<i64>,
+    pub max_value: Option<bool>,
+    pub min_value: Option<bool>,
+}
+
+impl Statistics for BooleanStatistics {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &PhysicalType::Boolean
+    }
+
+    fn null_count(&self) -> Option<i64> {
+        self.null_count
+    }
+}
+
+pub fn read(v: &ParquetStatistics) -> Result<Arc<dyn Statistics>> {
+    if let Some(ref v) = v.max_value {
+        if v.len() != std::mem::size_of::<bool>() {
+            return Err(Error::oos(
+                "The max_value of statistics MUST be plain encoded",
+            ));
+        }
+    };
+    if let Some(ref v) = v.min_value {
+        if v.len() != std::mem::size_of::<bool>() {
+            return Err(Error::oos(
+                "The min_value of statistics MUST be plain encoded",
+            ));
+        }
+    };
+
+    Ok(Arc::new(BooleanStatistics {
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v
+            .max_value
+            .as_ref()
+            .and_then(|x| x.first())
+            .map(|x| *x != 0),
+        min_value: v
+            .min_value
+            .as_ref()
+            .and_then(|x| x.first())
+            .map(|x| *x != 0),
+    }))
+}
+
+pub fn write(v: &BooleanStatistics) -> ParquetStatistics {
+    ParquetStatistics {
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v.max_value.map(|x| vec![x as u8]),
+        min_value: v.min_value.map(|x| vec![x as u8]),
+        min: None,
+        max: None,
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/statistics/fixed_len_binary.rs b/crates/polars-parquet/src/parquet/statistics/fixed_len_binary.rs
new file mode 100644
index 000000000000..6def092b7edc
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/statistics/fixed_len_binary.rs
@@ -0,0 +1,76 @@
+use std::sync::Arc;
+
+use parquet_format_safe::Statistics as ParquetStatistics;
+
+use super::Statistics;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FixedLenStatistics {
+    pub primitive_type: PrimitiveType,
+    pub null_count: Option<i64>,
+    pub distinct_count: Option<i64>,
+    pub max_value: Option<Vec<u8>>,
+    pub min_value: Option<Vec<u8>>,
+}
+
+impl Statistics for FixedLenStatistics {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &self.primitive_type.physical_type
+    }
+
+    fn null_count(&self) -> Option<i64> {
+        self.null_count
+    }
+}
+
+pub fn read(
+    v: &ParquetStatistics,
+    size: usize,
+    primitive_type: PrimitiveType,
+) -> Result<Arc<dyn Statistics>> {
+    if let Some(ref v) = v.max_value {
+        if v.len() != size {
+            return Err(Error::oos(
+                "The max_value of statistics MUST be plain encoded",
+            ));
+        }
+    };
+    if let Some(ref v) = v.min_value {
+        if v.len() != size {
+            return Err(Error::oos(
+                "The min_value of statistics MUST be plain encoded",
+            ));
+        }
+    };
+
+    Ok(Arc::new(FixedLenStatistics {
+        primitive_type,
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v.max_value.clone().map(|mut x| {
+            x.truncate(size);
+            x
+        }),
+        min_value: v.min_value.clone().map(|mut x| {
+            x.truncate(size);
+            x
+        }),
+    }))
+}
+
+pub fn write(v: &FixedLenStatistics) -> ParquetStatistics {
+    ParquetStatistics {
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v.max_value.clone(),
+        min_value: v.min_value.clone(),
+        min: None,
+        max: None,
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/statistics/mod.rs b/crates/polars-parquet/src/parquet/statistics/mod.rs
new file mode 100644
index 000000000000..7451ac753135
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/statistics/mod.rs
@@ -0,0 +1,134 @@
+mod binary;
+mod boolean;
+mod fixed_len_binary;
+mod primitive;
+
+use std::any::Any;
+use std::sync::Arc;
+
+pub use binary::BinaryStatistics;
+pub use boolean::BooleanStatistics;
+pub use fixed_len_binary::FixedLenStatistics;
+pub use primitive::PrimitiveStatistics;
+
+use crate::parquet::error::Result;
+use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
+pub use crate::parquet::thrift_format::Statistics as ParquetStatistics;
+
+/// A trait used to describe specific statistics. Each physical type has its own struct.
+/// Match the [`Statistics::physical_type`] to each type and downcast accordingly.
+pub trait Statistics: Send + Sync + std::fmt::Debug {
+    fn as_any(&self) -> &dyn Any;
+
+    fn physical_type(&self) -> &PhysicalType;
+
+    fn null_count(&self) -> Option<i64>;
+}
+
+impl PartialEq for &dyn Statistics {
+    fn eq(&self, other: &Self) -> bool {
+        self.physical_type() == other.physical_type() && {
+            match self.physical_type() {
+                PhysicalType::Boolean => {
+                    self.as_any().downcast_ref::<BooleanStatistics>().unwrap()
+                        == other.as_any().downcast_ref::<BooleanStatistics>().unwrap()
+                },
+                PhysicalType::Int32 => {
+                    self.as_any()
+                        .downcast_ref::<PrimitiveStatistics<i32>>()
+                        .unwrap()
+                        == other
+                            .as_any()
+                            .downcast_ref::<PrimitiveStatistics<i32>>()
+                            .unwrap()
+                },
+                PhysicalType::Int64 => {
+                    self.as_any()
+                        .downcast_ref::<PrimitiveStatistics<i64>>()
+                        .unwrap()
+                        == other
+                            .as_any()
+                            .downcast_ref::<PrimitiveStatistics<i64>>()
+                            .unwrap()
+                },
+                PhysicalType::Int96 => {
+                    self.as_any()
+                        .downcast_ref::<PrimitiveStatistics<[u32; 3]>>()
+                        .unwrap()
+                        == other
+                            .as_any()
+                            .downcast_ref::<PrimitiveStatistics<[u32; 3]>>()
+                            .unwrap()
+                },
+                PhysicalType::Float => {
+                    self.as_any()
+                        .downcast_ref::<PrimitiveStatistics<f32>>()
+                        .unwrap()
+                        == other
+                            .as_any()
+                            .downcast_ref::<PrimitiveStatistics<f32>>()
+                            .unwrap()
+                },
+                PhysicalType::Double => {
+                    self.as_any()
+                        .downcast_ref::<PrimitiveStatistics<f64>>()
+                        .unwrap()
+                        == other
+                            .as_any()
+                            .downcast_ref::<PrimitiveStatistics<f64>>()
+                            .unwrap()
+                },
+                PhysicalType::ByteArray => {
+                    self.as_any().downcast_ref::<BinaryStatistics>().unwrap()
+                        == other.as_any().downcast_ref::<BinaryStatistics>().unwrap()
+                },
+                PhysicalType::FixedLenByteArray(_) => {
+                    self.as_any().downcast_ref::<FixedLenStatistics>().unwrap()
+                        == other.as_any().downcast_ref::<FixedLenStatistics>().unwrap()
+                },
+            }
+        }
+    }
+}
+
+/// Deserializes a raw parquet statistics into [`Statistics`].
+/// # Error
+/// This function errors if it is not possible to read the statistics to the
+/// corresponding `physical_type`.
+pub fn deserialize_statistics(
+    statistics: &ParquetStatistics,
+    primitive_type: PrimitiveType,
+) -> Result<Arc<dyn Statistics>> {
+    match primitive_type.physical_type {
+        PhysicalType::Boolean => boolean::read(statistics),
+        PhysicalType::Int32 => primitive::read::<i32>(statistics, primitive_type),
+        PhysicalType::Int64 => primitive::read::<i64>(statistics, primitive_type),
+        PhysicalType::Int96 => primitive::read::<[u32; 3]>(statistics, primitive_type),
+        PhysicalType::Float => primitive::read::<f32>(statistics, primitive_type),
+        PhysicalType::Double => primitive::read::<f64>(statistics, primitive_type),
+        PhysicalType::ByteArray => binary::read(statistics, primitive_type),
+        PhysicalType::FixedLenByteArray(size) => {
+            fixed_len_binary::read(statistics, size, primitive_type)
+        },
+    }
+}
+
+/// Serializes [`Statistics`] into a raw parquet statistics.
+pub fn serialize_statistics(statistics: &dyn Statistics) -> ParquetStatistics {
+    match statistics.physical_type() {
+        PhysicalType::Boolean => boolean::write(statistics.as_any().downcast_ref().unwrap()),
+        PhysicalType::Int32 => primitive::write::<i32>(statistics.as_any().downcast_ref().unwrap()),
+        PhysicalType::Int64 => primitive::write::<i64>(statistics.as_any().downcast_ref().unwrap()),
+        PhysicalType::Int96 => {
+            primitive::write::<[u32; 3]>(statistics.as_any().downcast_ref().unwrap())
+        },
+        PhysicalType::Float => primitive::write::<f32>(statistics.as_any().downcast_ref().unwrap()),
+        PhysicalType::Double => {
+            primitive::write::<f64>(statistics.as_any().downcast_ref().unwrap())
+        },
+        PhysicalType::ByteArray => binary::write(statistics.as_any().downcast_ref().unwrap()),
+        PhysicalType::FixedLenByteArray(_) => {
+            fixed_len_binary::write(statistics.as_any().downcast_ref().unwrap())
+        },
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/statistics/primitive.rs b/crates/polars-parquet/src/parquet/statistics/primitive.rs
new file mode 100644
index 000000000000..17a927e9a1ac
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/statistics/primitive.rs
@@ -0,0 +1,70 @@
+use std::sync::Arc;
+
+use parquet_format_safe::Statistics as ParquetStatistics;
+
+use super::Statistics;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::types::{PhysicalType, PrimitiveType};
+use crate::parquet::types;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct PrimitiveStatistics<T: types::NativeType> {
+    pub primitive_type: PrimitiveType,
+    pub null_count: Option<i64>,
+    pub distinct_count: Option<i64>,
+    pub min_value: Option<T>,
+    pub max_value: Option<T>,
+}
+
+impl<T: types::NativeType> Statistics for PrimitiveStatistics<T> {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn physical_type(&self) -> &PhysicalType {
+        &T::TYPE
+    }
+
+    fn null_count(&self) -> Option<i64> {
+        self.null_count
+    }
+}
+
+pub fn read<T: types::NativeType>(
+    v: &ParquetStatistics,
+    primitive_type: PrimitiveType,
+) -> Result<Arc<dyn Statistics>> {
+    if let Some(ref v) = v.max_value {
+        if v.len() != std::mem::size_of::<T>() {
+            return Err(Error::oos(
+                "The max_value of statistics MUST be plain encoded",
+            ));
+        }
+    };
+    if let Some(ref v) = v.min_value {
+        if v.len() != std::mem::size_of::<T>() {
+            return Err(Error::oos(
+                "The min_value of statistics MUST be plain encoded",
+            ));
+        }
+    };
+
+    Ok(Arc::new(PrimitiveStatistics::<T> {
+        primitive_type,
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v.max_value.as_ref().map(|x| types::decode(x)),
+        min_value: v.min_value.as_ref().map(|x| types::decode(x)),
+    }))
+}
+
+pub fn write<T: types::NativeType>(v: &PrimitiveStatistics<T>) -> ParquetStatistics {
+    ParquetStatistics {
+        null_count: v.null_count,
+        distinct_count: v.distinct_count,
+        max_value: v.max_value.map(|x| x.to_le_bytes().as_ref().to_vec()),
+        min_value: v.min_value.map(|x| x.to_le_bytes().as_ref().to_vec()),
+        min: None,
+        max: None,
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/types.rs b/crates/polars-parquet/src/parquet/types.rs
new file mode 100644
index 000000000000..59f6c71dc7ab
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/types.rs
@@ -0,0 +1,141 @@
+use std::convert::TryFrom;
+
+use crate::parquet::schema::types::PhysicalType;
+
+/// A physical native representation of a Parquet fixed-sized type.
+pub trait NativeType: std::fmt::Debug + Send + Sync + 'static + Copy + Clone {
+    type Bytes: AsRef<[u8]> + for<'a> TryFrom<&'a [u8], Error = std::array::TryFromSliceError>;
+
+    fn to_le_bytes(&self) -> Self::Bytes;
+
+    fn from_le_bytes(bytes: Self::Bytes) -> Self;
+
+    fn ord(&self, other: &Self) -> std::cmp::Ordering;
+
+    const TYPE: PhysicalType;
+}
+
+macro_rules! native {
+    ($type:ty, $physical_type:expr) => {
+        impl NativeType for $type {
+            type Bytes = [u8; std::mem::size_of::<Self>()];
+            #[inline]
+            fn to_le_bytes(&self) -> Self::Bytes {
+                Self::to_le_bytes(*self)
+            }
+
+            #[inline]
+            fn from_le_bytes(bytes: Self::Bytes) -> Self {
+                Self::from_le_bytes(bytes)
+            }
+
+            #[inline]
+            fn ord(&self, other: &Self) -> std::cmp::Ordering {
+                self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal)
+            }
+
+            const TYPE: PhysicalType = $physical_type;
+        }
+    };
+}
+
+native!(i32, PhysicalType::Int32);
+native!(i64, PhysicalType::Int64);
+native!(f32, PhysicalType::Float);
+native!(f64, PhysicalType::Double);
+
+impl NativeType for [u32; 3] {
+    const TYPE: PhysicalType = PhysicalType::Int96;
+
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        let mut bytes = [0; 12];
+        let first = self[0].to_le_bytes();
+        bytes[0] = first[0];
+        bytes[1] = first[1];
+        bytes[2] = first[2];
+        bytes[3] = first[3];
+        let second = self[1].to_le_bytes();
+        bytes[4] = second[0];
+        bytes[5] = second[1];
+        bytes[6] = second[2];
+        bytes[7] = second[3];
+        let third = self[2].to_le_bytes();
+        bytes[8] = third[0];
+        bytes[9] = third[1];
+        bytes[10] = third[2];
+        bytes[11] = third[3];
+        bytes
+    }
+
+    #[inline]
+    fn from_le_bytes(bytes: Self::Bytes) -> Self {
+        let mut first = [0; 4];
+        first[0] = bytes[0];
+        first[1] = bytes[1];
+        first[2] = bytes[2];
+        first[3] = bytes[3];
+        let mut second = [0; 4];
+        second[0] = bytes[4];
+        second[1] = bytes[5];
+        second[2] = bytes[6];
+        second[3] = bytes[7];
+        let mut third = [0; 4];
+        third[0] = bytes[8];
+        third[1] = bytes[9];
+        third[2] = bytes[10];
+        third[3] = bytes[11];
+        [
+            u32::from_le_bytes(first),
+            u32::from_le_bytes(second),
+            u32::from_le_bytes(third),
+        ]
+    }
+
+    #[inline]
+    fn ord(&self, other: &Self) -> std::cmp::Ordering {
+        int96_to_i64_ns(*self).ord(&int96_to_i64_ns(*other))
+    }
+}
+
+#[inline]
+pub fn int96_to_i64_ns(value: [u32; 3]) -> i64 {
+    const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
+    const SECONDS_PER_DAY: i64 = 86_400;
+    const NANOS_PER_SECOND: i64 = 1_000_000_000;
+
+    let day = value[2] as i64;
+    let nanoseconds = ((value[1] as i64) << 32) + value[0] as i64;
+    let seconds = (day - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY;
+
+    seconds * NANOS_PER_SECOND + nanoseconds
+}
+
+/// Returns the ordering of two binary values.
+pub fn ord_binary<'a>(a: &'a [u8], b: &'a [u8]) -> std::cmp::Ordering {
+    use std::cmp::Ordering::*;
+    match (a.is_empty(), b.is_empty()) {
+        (true, true) => return Equal,
+        (true, false) => return Less,
+        (false, true) => return Greater,
+        (false, false) => {},
+    }
+
+    for (v1, v2) in a.iter().zip(b.iter()) {
+        match v1.cmp(v2) {
+            Equal => continue,
+            other => return other,
+        }
+    }
+    Equal
+}
+
+#[inline]
+pub fn decode<T: NativeType>(chunk: &[u8]) -> T {
+    let chunk: <T as NativeType>::Bytes = match chunk.try_into() {
+        Ok(v) => v,
+        Err(_) => panic!(),
+    };
+    T::from_le_bytes(chunk)
+}
diff --git a/crates/polars-parquet/src/parquet/write/column_chunk.rs b/crates/polars-parquet/src/parquet/write/column_chunk.rs
new file mode 100644
index 000000000000..94452d2ac2d2
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/column_chunk.rs
@@ -0,0 +1,208 @@
+use std::io::Write;
+
+#[cfg(feature = "async")]
+use futures::AsyncWrite;
+use parquet_format_safe::thrift::protocol::TCompactOutputProtocol;
+#[cfg(feature = "async")]
+use parquet_format_safe::thrift::protocol::TCompactOutputStreamProtocol;
+use parquet_format_safe::{ColumnChunk, ColumnMetaData, Type};
+use polars_utils::aliases::PlHashSet;
+
+#[cfg(feature = "async")]
+use super::page::write_page_async;
+use super::page::{write_page, PageWriteSpec};
+use super::statistics::reduce;
+use super::DynStreamingIterator;
+use crate::parquet::compression::Compression;
+use crate::parquet::encoding::Encoding;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::metadata::ColumnDescriptor;
+use crate::parquet::page::{CompressedPage, PageType};
+use crate::parquet::statistics::serialize_statistics;
+use crate::parquet::FallibleStreamingIterator;
+
+pub fn write_column_chunk<W, E>(
+    writer: &mut W,
+    mut offset: u64,
+    descriptor: &ColumnDescriptor,
+    mut compressed_pages: DynStreamingIterator<'_, CompressedPage, E>,
+) -> Result<(ColumnChunk, Vec<PageWriteSpec>, u64)>
+where
+    W: Write,
+    Error: From<E>,
+    E: std::error::Error,
+{
+    // write every page
+
+    let initial = offset;
+
+    let mut specs = vec![];
+    while let Some(compressed_page) = compressed_pages.next()? {
+        let spec = write_page(writer, offset, compressed_page)?;
+        offset += spec.bytes_written;
+        specs.push(spec);
+    }
+    let mut bytes_written = offset - initial;
+
+    let column_chunk = build_column_chunk(&specs, descriptor)?;
+
+    // write metadata
+    let mut protocol = TCompactOutputProtocol::new(writer);
+    bytes_written += column_chunk
+        .meta_data
+        .as_ref()
+        .unwrap()
+        .write_to_out_protocol(&mut protocol)? as u64;
+
+    Ok((column_chunk, specs, bytes_written))
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub async fn write_column_chunk_async<W, E>(
+    writer: &mut W,
+    mut offset: u64,
+    descriptor: &ColumnDescriptor,
+    mut compressed_pages: DynStreamingIterator<'_, CompressedPage, E>,
+) -> Result<(ColumnChunk, Vec<PageWriteSpec>, u64)>
+where
+    W: AsyncWrite + Unpin + Send,
+    Error: From<E>,
+    E: std::error::Error,
+{
+    let initial = offset;
+    // write every page
+    let mut specs = vec![];
+    while let Some(compressed_page) = compressed_pages.next()? {
+        let spec = write_page_async(writer, offset, compressed_page).await?;
+        offset += spec.bytes_written;
+        specs.push(spec);
+    }
+    let mut bytes_written = offset - initial;
+
+    let column_chunk = build_column_chunk(&specs, descriptor)?;
+
+    // write metadata
+    let mut protocol = TCompactOutputStreamProtocol::new(writer);
+    bytes_written += column_chunk
+        .meta_data
+        .as_ref()
+        .unwrap()
+        .write_to_out_stream_protocol(&mut protocol)
+        .await? as u64;
+
+    Ok((column_chunk, specs, bytes_written))
+}
+
+fn build_column_chunk(
+    specs: &[PageWriteSpec],
+    descriptor: &ColumnDescriptor,
+) -> Result<ColumnChunk> {
+    // compute stats to build header at the end of the chunk
+
+    let compression = specs
+        .iter()
+        .map(|spec| spec.compression)
+        .collect::<PlHashSet<_>>();
+    if compression.len() > 1 {
+        return Err(crate::parquet::error::Error::oos(
+            "All pages within a column chunk must be compressed with the same codec",
+        ));
+    }
+    let compression = compression
+        .into_iter()
+        .next()
+        .unwrap_or(Compression::Uncompressed);
+
+    // SPEC: the total compressed size is the total compressed size of each page + the header size
+    let total_compressed_size = specs
+        .iter()
+        .map(|x| x.header_size as i64 + x.header.compressed_page_size as i64)
+        .sum();
+    // SPEC: the total compressed size is the total compressed size of each page + the header size
+    let total_uncompressed_size = specs
+        .iter()
+        .map(|x| x.header_size as i64 + x.header.uncompressed_page_size as i64)
+        .sum();
+    let data_page_offset = specs.first().map(|spec| spec.offset).unwrap_or(0) as i64;
+    let num_values = specs
+        .iter()
+        .map(|spec| {
+            let type_ = spec.header.type_.try_into().unwrap();
+            match type_ {
+                PageType::DataPage => {
+                    spec.header.data_page_header.as_ref().unwrap().num_values as i64
+                },
+                PageType::DataPageV2 => {
+                    spec.header.data_page_header_v2.as_ref().unwrap().num_values as i64
+                },
+                _ => 0, // only data pages contribute
+            }
+        })
+        .sum();
+    let mut encodings = specs
+        .iter()
+        .flat_map(|spec| {
+            let type_ = spec.header.type_.try_into().unwrap();
+            match type_ {
+                PageType::DataPage => vec![
+                    spec.header.data_page_header.as_ref().unwrap().encoding,
+                    Encoding::Rle.into(),
+                ],
+                PageType::DataPageV2 => {
+                    vec![
+                        spec.header.data_page_header_v2.as_ref().unwrap().encoding,
+                        Encoding::Rle.into(),
+                    ]
+                },
+                PageType::DictionaryPage => vec![
+                    spec.header
+                        .dictionary_page_header
+                        .as_ref()
+                        .unwrap()
+                        .encoding,
+                ],
+            }
+        })
+        .collect::<PlHashSet<_>>() // unique
+        .into_iter() // to vec
+        .collect::<Vec<_>>();
+
+    // Sort the encodings to have deterministic metadata
+    encodings.sort();
+
+    let statistics = specs.iter().map(|x| &x.statistics).collect::<Vec<_>>();
+    let statistics = reduce(&statistics)?;
+    let statistics = statistics.map(|x| serialize_statistics(x.as_ref()));
+
+    let (type_, _): (Type, Option<i32>) = descriptor.descriptor.primitive_type.physical_type.into();
+
+    let metadata = ColumnMetaData {
+        type_,
+        encodings,
+        path_in_schema: descriptor.path_in_schema.clone(),
+        codec: compression.into(),
+        num_values,
+        total_uncompressed_size,
+        total_compressed_size,
+        key_value_metadata: None,
+        data_page_offset,
+        index_page_offset: None,
+        dictionary_page_offset: None,
+        statistics,
+        encoding_stats: None,
+        bloom_filter_offset: None,
+    };
+
+    Ok(ColumnChunk {
+        file_path: None, // same file for now.
+        file_offset: data_page_offset + total_compressed_size,
+        meta_data: Some(metadata),
+        offset_index_offset: None,
+        offset_index_length: None,
+        column_index_offset: None,
+        column_index_length: None,
+        crypto_metadata: None,
+        encrypted_column_metadata: None,
+    })
+}
diff --git a/crates/polars-parquet/src/parquet/write/compression.rs b/crates/polars-parquet/src/parquet/write/compression.rs
new file mode 100644
index 000000000000..4451811982d4
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/compression.rs
@@ -0,0 +1,160 @@
+use crate::parquet::compression::CompressionOptions;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::page::{
+    CompressedDataPage, CompressedDictPage, CompressedPage, DataPage, DataPageHeader, DictPage,
+    Page,
+};
+use crate::parquet::{compression, FallibleStreamingIterator};
+
+/// Compresses a [`DataPage`] into a [`CompressedDataPage`].
+fn compress_data(
+    page: DataPage,
+    mut compressed_buffer: Vec<u8>,
+    compression: CompressionOptions,
+) -> Result<CompressedDataPage> {
+    let DataPage {
+        mut buffer,
+        header,
+        descriptor,
+        selected_rows,
+    } = page;
+    let uncompressed_page_size = buffer.len();
+    if compression != CompressionOptions::Uncompressed {
+        match &header {
+            DataPageHeader::V1(_) => {
+                compression::compress(compression, &buffer, &mut compressed_buffer)?;
+            },
+            DataPageHeader::V2(header) => {
+                let levels_byte_length = (header.repetition_levels_byte_length
+                    + header.definition_levels_byte_length)
+                    as usize;
+                compressed_buffer.extend_from_slice(&buffer[..levels_byte_length]);
+                compression::compress(
+                    compression,
+                    &buffer[levels_byte_length..],
+                    &mut compressed_buffer,
+                )?;
+            },
+        };
+    } else {
+        std::mem::swap(&mut buffer, &mut compressed_buffer);
+    };
+    Ok(CompressedDataPage::new_read(
+        header,
+        compressed_buffer,
+        compression.into(),
+        uncompressed_page_size,
+        descriptor,
+        selected_rows,
+    ))
+}
+
+fn compress_dict(
+    page: DictPage,
+    mut compressed_buffer: Vec<u8>,
+    compression: CompressionOptions,
+) -> Result<CompressedDictPage> {
+    let DictPage {
+        mut buffer,
+        num_values,
+        is_sorted,
+    } = page;
+    let uncompressed_page_size = buffer.len();
+    if compression != CompressionOptions::Uncompressed {
+        compression::compress(compression, &buffer, &mut compressed_buffer)?;
+    } else {
+        std::mem::swap(&mut buffer, &mut compressed_buffer);
+    }
+    Ok(CompressedDictPage::new(
+        compressed_buffer,
+        compression.into(),
+        uncompressed_page_size,
+        num_values,
+        is_sorted,
+    ))
+}
+
+/// Compresses an [`EncodedPage`] into a [`CompressedPage`] using `compressed_buffer` as the
+/// intermediary buffer.
+///
+/// `compressed_buffer` is taken by value because it becomes owned by [`CompressedPage`]
+///
+/// # Errors
+/// Errors if the compressor fails
+pub fn compress(
+    page: Page,
+    compressed_buffer: Vec<u8>,
+    compression: CompressionOptions,
+) -> Result<CompressedPage> {
+    match page {
+        Page::Data(page) => {
+            compress_data(page, compressed_buffer, compression).map(CompressedPage::Data)
+        },
+        Page::Dict(page) => {
+            compress_dict(page, compressed_buffer, compression).map(CompressedPage::Dict)
+        },
+    }
+}
+
+/// A [`FallibleStreamingIterator`] that consumes [`Page`] and yields [`CompressedPage`]
+/// holding a reusable buffer ([`Vec<u8>`]) for compression.
+pub struct Compressor<I: Iterator<Item = Result<Page>>> {
+    iter: I,
+    compression: CompressionOptions,
+    buffer: Vec<u8>,
+    current: Option<CompressedPage>,
+}
+
+impl<I: Iterator<Item = Result<Page>>> Compressor<I> {
+    /// Creates a new [`Compressor`]
+    pub fn new(iter: I, compression: CompressionOptions, buffer: Vec<u8>) -> Self {
+        Self {
+            iter,
+            compression,
+            buffer,
+            current: None,
+        }
+    }
+
+    /// Creates a new [`Compressor`] (same as `new`)
+    pub fn new_from_vec(iter: I, compression: CompressionOptions, buffer: Vec<u8>) -> Self {
+        Self::new(iter, compression, buffer)
+    }
+
+    /// Deconstructs itself into its iterator and scratch buffer.
+    pub fn into_inner(mut self) -> (I, Vec<u8>) {
+        let mut buffer = if let Some(page) = self.current.as_mut() {
+            std::mem::take(page.buffer())
+        } else {
+            std::mem::take(&mut self.buffer)
+        };
+        buffer.clear();
+        (self.iter, buffer)
+    }
+}
+
+impl<I: Iterator<Item = Result<Page>>> FallibleStreamingIterator for Compressor<I> {
+    type Item = CompressedPage;
+    type Error = Error;
+
+    fn advance(&mut self) -> std::result::Result<(), Self::Error> {
+        let mut compressed_buffer = if let Some(page) = self.current.as_mut() {
+            std::mem::take(page.buffer())
+        } else {
+            std::mem::take(&mut self.buffer)
+        };
+        compressed_buffer.clear();
+
+        let next = self
+            .iter
+            .next()
+            .map(|x| x.and_then(|page| compress(page, compressed_buffer, self.compression)))
+            .transpose()?;
+        self.current = next;
+        Ok(())
+    }
+
+    fn get(&self) -> Option<&Self::Item> {
+        self.current.as_ref()
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/write/dyn_iter.rs b/crates/polars-parquet/src/parquet/write/dyn_iter.rs
new file mode 100644
index 000000000000..f47710b56b22
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/dyn_iter.rs
@@ -0,0 +1,65 @@
+use crate::parquet::FallibleStreamingIterator;
+
+/// [`DynIter`] is an implementation of a single-threaded, dynamically-typed iterator.
+///
+/// This implementation is object safe.
+pub struct DynIter<'a, V> {
+    iter: Box<dyn Iterator<Item = V> + 'a + Send + Sync>,
+}
+
+impl<'a, V> Iterator for DynIter<'a, V> {
+    type Item = V;
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next()
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+}
+
+impl<'a, V> DynIter<'a, V> {
+    /// Returns a new [`DynIter`], boxing the incoming iterator
+    pub fn new<I>(iter: I) -> Self
+    where
+        I: Iterator<Item = V> + 'a + Send + Sync,
+    {
+        Self {
+            iter: Box::new(iter),
+        }
+    }
+}
+
+/// Dynamically-typed [`FallibleStreamingIterator`].
+pub struct DynStreamingIterator<'a, V, E> {
+    iter: Box<dyn FallibleStreamingIterator<Item = V, Error = E> + 'a + Send + Sync>,
+}
+
+impl<'a, V, E> FallibleStreamingIterator for DynStreamingIterator<'a, V, E> {
+    type Item = V;
+    type Error = E;
+
+    fn advance(&mut self) -> Result<(), Self::Error> {
+        self.iter.advance()
+    }
+
+    fn get(&self) -> Option<&Self::Item> {
+        self.iter.get()
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+}
+
+impl<'a, V, E> DynStreamingIterator<'a, V, E> {
+    /// Returns a new [`DynStreamingIterator`], boxing the incoming iterator
+    pub fn new<I>(iter: I) -> Self
+    where
+        I: FallibleStreamingIterator<Item = V, Error = E> + 'a + Send + Sync,
+    {
+        Self {
+            iter: Box::new(iter),
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/write/file.rs b/crates/polars-parquet/src/parquet/write/file.rs
new file mode 100644
index 000000000000..43fc81dbfdc1
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/file.rs
@@ -0,0 +1,279 @@
+use std::io::Write;
+
+use parquet_format_safe::thrift::protocol::TCompactOutputProtocol;
+use parquet_format_safe::RowGroup;
+
+use super::indexes::{write_column_index, write_offset_index};
+use super::page::PageWriteSpec;
+use super::row_group::write_row_group;
+use super::{RowGroupIter, WriteOptions};
+use crate::parquet::error::{Error, Result};
+pub use crate::parquet::metadata::KeyValue;
+use crate::parquet::metadata::{SchemaDescriptor, ThriftFileMetaData};
+use crate::parquet::write::State;
+use crate::parquet::{FOOTER_SIZE, PARQUET_MAGIC};
+
+pub(super) fn start_file<W: Write>(writer: &mut W) -> Result<u64> {
+    writer.write_all(&PARQUET_MAGIC)?;
+    Ok(PARQUET_MAGIC.len() as u64)
+}
+
+pub(super) fn end_file<W: Write>(mut writer: &mut W, metadata: &ThriftFileMetaData) -> Result<u64> {
+    // Write metadata
+    let mut protocol = TCompactOutputProtocol::new(&mut writer);
+    let metadata_len = metadata.write_to_out_protocol(&mut protocol)? as i32;
+
+    // Write footer
+    let metadata_bytes = metadata_len.to_le_bytes();
+    let mut footer_buffer = [0u8; FOOTER_SIZE as usize];
+    (0..4).for_each(|i| {
+        footer_buffer[i] = metadata_bytes[i];
+    });
+
+    (&mut footer_buffer[4..]).write_all(&PARQUET_MAGIC)?;
+    writer.write_all(&footer_buffer)?;
+    writer.flush()?;
+    Ok(metadata_len as u64 + FOOTER_SIZE)
+}
+
+/// An interface to write a parquet file.
+/// Use `start` to write the header, `write` to write a row group,
+/// and `end` to write the footer.
+pub struct FileWriter<W: Write> {
+    writer: W,
+    schema: SchemaDescriptor,
+    options: WriteOptions,
+    created_by: Option<String>,
+
+    offset: u64,
+    row_groups: Vec<RowGroup>,
+    page_specs: Vec<Vec<Vec<PageWriteSpec>>>,
+    /// Used to store the current state for writing the file
+    state: State,
+    // when the file is written, metadata becomes available
+    metadata: Option<ThriftFileMetaData>,
+}
+
+/// Writes a parquet file containing only the header and footer
+///
+/// This is used to write the metadata as a separate Parquet file, usually when data
+/// is partitioned across multiple files.
+///
+/// Note: Recall that when combining row groups from [`ThriftFileMetaData`], the `file_path` on each
+/// of their column chunks must be updated with their path relative to where they are written to.
+pub fn write_metadata_sidecar<W: Write>(
+    writer: &mut W,
+    metadata: &ThriftFileMetaData,
+) -> Result<u64> {
+    let mut len = start_file(writer)?;
+    len += end_file(writer, metadata)?;
+    Ok(len)
+}
+
+// Accessors
+impl<W: Write> FileWriter<W> {
+    /// The options assigned to the file
+    pub fn options(&self) -> &WriteOptions {
+        &self.options
+    }
+
+    /// The [`SchemaDescriptor`] assigned to this file
+    pub fn schema(&self) -> &SchemaDescriptor {
+        &self.schema
+    }
+
+    /// Returns the [`ThriftFileMetaData`]. This is Some iff the [`Self::end`] has been called.
+    ///
+    /// This is used to write the metadata as a separate Parquet file, usually when data
+    /// is partitioned across multiple files
+    pub fn metadata(&self) -> Option<&ThriftFileMetaData> {
+        self.metadata.as_ref()
+    }
+}
+
+impl<W: Write> FileWriter<W> {
+    /// Returns a new [`FileWriter`].
+    pub fn new(
+        writer: W,
+        schema: SchemaDescriptor,
+        options: WriteOptions,
+        created_by: Option<String>,
+    ) -> Self {
+        Self {
+            writer,
+            schema,
+            options,
+            created_by,
+            offset: 0,
+            row_groups: vec![],
+            page_specs: vec![],
+            state: State::Initialised,
+            metadata: None,
+        }
+    }
+
+    /// Writes the header of the file.
+    ///
+    /// This is automatically called by [`Self::write`] if not called following [`Self::new`].
+    ///
+    /// # Errors
+    /// Returns an error if data has been written to the file.
+    fn start(&mut self) -> Result<()> {
+        if self.offset == 0 {
+            self.offset = start_file(&mut self.writer)?;
+            self.state = State::Started;
+            Ok(())
+        } else {
+            Err(Error::InvalidParameter(
+                "Start cannot be called twice".to_string(),
+            ))
+        }
+    }
+
+    /// Writes a row group to the file.
+    ///
+    /// This call is IO-bounded
+    pub fn write<E>(&mut self, row_group: RowGroupIter<'_, E>) -> Result<()>
+    where
+        Error: From<E>,
+        E: std::error::Error,
+    {
+        if self.offset == 0 {
+            self.start()?;
+        }
+        let ordinal = self.row_groups.len();
+        let (group, specs, size) = write_row_group(
+            &mut self.writer,
+            self.offset,
+            self.schema.columns(),
+            row_group,
+            ordinal,
+        )?;
+        self.offset += size;
+        self.row_groups.push(group);
+        self.page_specs.push(specs);
+        Ok(())
+    }
+
+    /// Writes the footer of the parquet file. Returns the total size of the file and the
+    /// underlying writer.
+    pub fn end(&mut self, key_value_metadata: Option<Vec<KeyValue>>) -> Result<u64> {
+        if self.offset == 0 {
+            self.start()?;
+        }
+
+        if self.state != State::Started {
+            return Err(Error::InvalidParameter(
+                "End cannot be called twice".to_string(),
+            ));
+        }
+        // compute file stats
+        let num_rows = self.row_groups.iter().map(|group| group.num_rows).sum();
+
+        if self.options.write_statistics {
+            // write column indexes (require page statistics)
+            self.row_groups
+                .iter_mut()
+                .zip(self.page_specs.iter())
+                .try_for_each(|(group, pages)| {
+                    group.columns.iter_mut().zip(pages.iter()).try_for_each(
+                        |(column, pages)| {
+                            let offset = self.offset;
+                            column.column_index_offset = Some(offset as i64);
+                            self.offset += write_column_index(&mut self.writer, pages)?;
+                            let length = self.offset - offset;
+                            column.column_index_length = Some(length as i32);
+                            Result::Ok(())
+                        },
+                    )?;
+                    Result::Ok(())
+                })?;
+        };
+
+        // write offset index
+        self.row_groups
+            .iter_mut()
+            .zip(self.page_specs.iter())
+            .try_for_each(|(group, pages)| {
+                group
+                    .columns
+                    .iter_mut()
+                    .zip(pages.iter())
+                    .try_for_each(|(column, pages)| {
+                        let offset = self.offset;
+                        column.offset_index_offset = Some(offset as i64);
+                        self.offset += write_offset_index(&mut self.writer, pages)?;
+                        column.offset_index_length = Some((self.offset - offset) as i32);
+                        Result::Ok(())
+                    })?;
+                Result::Ok(())
+            })?;
+
+        let metadata = ThriftFileMetaData::new(
+            self.options.version.into(),
+            self.schema.clone().into_thrift(),
+            num_rows,
+            self.row_groups.clone(),
+            key_value_metadata,
+            self.created_by.clone(),
+            None,
+            None,
+            None,
+        );
+
+        let len = end_file(&mut self.writer, &metadata)?;
+        self.state = State::Finished;
+        self.metadata = Some(metadata);
+        Ok(self.offset + len)
+    }
+
+    /// Returns the underlying writer.
+    pub fn into_inner(self) -> W {
+        self.writer
+    }
+
+    /// Returns the underlying writer and [`ThriftFileMetaData`]
+    /// # Panics
+    /// This function panics if [`Self::end`] has not yet been called
+    pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetaData) {
+        (self.writer, self.metadata.expect("File to have ended"))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs::File;
+    use std::io::Cursor;
+
+    use super::*;
+    use crate::parquet::error::Result;
+    use crate::parquet::read::read_metadata;
+    use crate::parquet::tests::get_path;
+
+    #[test]
+    fn empty_file() -> Result<()> {
+        let mut testdata = get_path();
+        testdata.push("alltypes_plain.parquet");
+        let mut file = File::open(testdata).unwrap();
+
+        let mut metadata = read_metadata(&mut file)?;
+
+        // take away all groups and rows
+        metadata.row_groups = vec![];
+        metadata.num_rows = 0;
+
+        let mut writer = Cursor::new(vec![]);
+
+        // write the file
+        start_file(&mut writer)?;
+        end_file(&mut writer, &metadata.into_thrift())?;
+
+        let a = writer.into_inner();
+
+        // read it again:
+        let result = read_metadata(&mut Cursor::new(a));
+        assert!(result.is_ok());
+
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/write/indexes/mod.rs b/crates/polars-parquet/src/parquet/write/indexes/mod.rs
new file mode 100644
index 000000000000..9f413a15d26a
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/indexes/mod.rs
@@ -0,0 +1,4 @@
+mod serialize;
+mod write;
+
+pub use write::*;
diff --git a/crates/polars-parquet/src/parquet/write/indexes/serialize.rs b/crates/polars-parquet/src/parquet/write/indexes/serialize.rs
new file mode 100644
index 000000000000..002ff2059371
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/indexes/serialize.rs
@@ -0,0 +1,78 @@
+use parquet_format_safe::{BoundaryOrder, ColumnIndex, OffsetIndex, PageLocation};
+
+use crate::parquet::error::{Error, Result};
+pub use crate::parquet::metadata::KeyValue;
+use crate::parquet::statistics::serialize_statistics;
+use crate::parquet::write::page::{is_data_page, PageWriteSpec};
+
+pub fn serialize_column_index(pages: &[PageWriteSpec]) -> Result<ColumnIndex> {
+    let mut null_pages = Vec::with_capacity(pages.len());
+    let mut min_values = Vec::with_capacity(pages.len());
+    let mut max_values = Vec::with_capacity(pages.len());
+    let mut null_counts = Vec::with_capacity(pages.len());
+
+    pages
+        .iter()
+        .filter(|x| is_data_page(x))
+        .try_for_each(|spec| {
+            if let Some(stats) = &spec.statistics {
+                let stats = serialize_statistics(stats.as_ref());
+
+                let null_count = stats
+                    .null_count
+                    .ok_or_else(|| Error::oos("null count of a page is required"))?;
+                null_counts.push(null_count);
+
+                if let Some(min_value) = stats.min_value {
+                    min_values.push(min_value);
+                    max_values.push(
+                        stats
+                            .max_value
+                            .ok_or_else(|| Error::oos("max value of a page is required"))?,
+                    );
+                    null_pages.push(false)
+                } else {
+                    min_values.push(vec![0]);
+                    max_values.push(vec![0]);
+                    null_pages.push(true)
+                }
+
+                Result::Ok(())
+            } else {
+                Err(Error::oos(
+                    "options were set to write statistics but some pages miss them",
+                ))
+            }
+        })?;
+    Ok(ColumnIndex {
+        null_pages,
+        min_values,
+        max_values,
+        boundary_order: BoundaryOrder::UNORDERED,
+        null_counts: Some(null_counts),
+    })
+}
+
+pub fn serialize_offset_index(pages: &[PageWriteSpec]) -> Result<OffsetIndex> {
+    let mut first_row_index = 0;
+    let page_locations = pages
+        .iter()
+        .filter(|x| is_data_page(x))
+        .map(|spec| {
+            let location = PageLocation {
+                offset: spec.offset.try_into()?,
+                compressed_page_size: spec.bytes_written.try_into()?,
+                first_row_index,
+            };
+            let num_rows = spec.num_rows.ok_or_else(|| {
+                Error::oos(
+                    "options were set to write statistics but some data pages miss number of rows",
+                )
+            })?;
+            first_row_index += num_rows as i64;
+            Ok(location)
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    Ok(OffsetIndex { page_locations })
+}
diff --git a/crates/polars-parquet/src/parquet/write/indexes/write.rs b/crates/polars-parquet/src/parquet/write/indexes/write.rs
new file mode 100644
index 000000000000..5aab227b7bac
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/indexes/write.rs
@@ -0,0 +1,46 @@
+use std::io::Write;
+
+#[cfg(feature = "async")]
+use futures::AsyncWrite;
+use parquet_format_safe::thrift::protocol::TCompactOutputProtocol;
+#[cfg(feature = "async")]
+use parquet_format_safe::thrift::protocol::TCompactOutputStreamProtocol;
+
+use super::serialize::{serialize_column_index, serialize_offset_index};
+use crate::parquet::error::Result;
+pub use crate::parquet::metadata::KeyValue;
+use crate::parquet::write::page::PageWriteSpec;
+
+pub fn write_column_index<W: Write>(writer: &mut W, pages: &[PageWriteSpec]) -> Result<u64> {
+    let index = serialize_column_index(pages)?;
+    let mut protocol = TCompactOutputProtocol::new(writer);
+    Ok(index.write_to_out_protocol(&mut protocol)? as u64)
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub async fn write_column_index_async<W: AsyncWrite + Unpin + Send>(
+    writer: &mut W,
+    pages: &[PageWriteSpec],
+) -> Result<u64> {
+    let index = serialize_column_index(pages)?;
+    let mut protocol = TCompactOutputStreamProtocol::new(writer);
+    Ok(index.write_to_out_stream_protocol(&mut protocol).await? as u64)
+}
+
+pub fn write_offset_index<W: Write>(writer: &mut W, pages: &[PageWriteSpec]) -> Result<u64> {
+    let index = serialize_offset_index(pages)?;
+    let mut protocol = TCompactOutputProtocol::new(&mut *writer);
+    Ok(index.write_to_out_protocol(&mut protocol)? as u64)
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub async fn write_offset_index_async<W: AsyncWrite + Unpin + Send>(
+    writer: &mut W,
+    pages: &[PageWriteSpec],
+) -> Result<u64> {
+    let index = serialize_offset_index(pages)?;
+    let mut protocol = TCompactOutputStreamProtocol::new(&mut *writer);
+    Ok(index.write_to_out_stream_protocol(&mut protocol).await? as u64)
+}
diff --git a/crates/polars-parquet/src/parquet/write/mod.rs b/crates/polars-parquet/src/parquet/write/mod.rs
new file mode 100644
index 000000000000..251d37472db4
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/mod.rs
@@ -0,0 +1,57 @@
+mod column_chunk;
+mod compression;
+mod file;
+mod indexes;
+pub(crate) mod page;
+mod row_group;
+mod statistics;
+
+#[cfg(feature = "async")]
+mod stream;
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub use stream::FileStreamer;
+
+mod dyn_iter;
+pub use compression::{compress, Compressor};
+pub use dyn_iter::{DynIter, DynStreamingIterator};
+pub use file::{write_metadata_sidecar, FileWriter};
+pub use row_group::ColumnOffsetsMetadata;
+
+use crate::parquet::page::CompressedPage;
+
+pub type RowGroupIter<'a, E> =
+    DynIter<'a, std::result::Result<DynStreamingIterator<'a, CompressedPage, E>, E>>;
+
+/// Write options of different interfaces on this crate
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct WriteOptions {
+    /// Whether to write statistics, including indexes
+    pub write_statistics: bool,
+    /// Which Parquet version to use
+    pub version: Version,
+}
+
+/// The parquet version to use
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum Version {
+    V1,
+    V2,
+}
+
+/// Used to recall the state of the parquet writer - whether sync or async.
+#[derive(PartialEq)]
+enum State {
+    Initialised,
+    Started,
+    Finished,
+}
+
+impl From<Version> for i32 {
+    fn from(version: Version) -> Self {
+        match version {
+            Version::V1 => 1,
+            Version::V2 => 2,
+        }
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/write/page.rs b/crates/polars-parquet/src/parquet/write/page.rs
new file mode 100644
index 000000000000..1f024b629f07
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/page.rs
@@ -0,0 +1,243 @@
+use std::convert::TryInto;
+use std::io::Write;
+use std::sync::Arc;
+
+#[cfg(feature = "async")]
+use futures::{AsyncWrite, AsyncWriteExt};
+use parquet_format_safe::thrift::protocol::TCompactOutputProtocol;
+#[cfg(feature = "async")]
+use parquet_format_safe::thrift::protocol::TCompactOutputStreamProtocol;
+use parquet_format_safe::{DictionaryPageHeader, Encoding, PageType};
+
+use crate::parquet::compression::Compression;
+use crate::parquet::error::{Error, Result};
+use crate::parquet::page::{
+    CompressedDataPage, CompressedDictPage, CompressedPage, DataPageHeader, ParquetPageHeader,
+};
+use crate::parquet::statistics::Statistics;
+
+pub(crate) fn is_data_page(page: &PageWriteSpec) -> bool {
+    page.header.type_ == PageType::DATA_PAGE || page.header.type_ == PageType::DATA_PAGE_V2
+}
+
+fn maybe_bytes(uncompressed: usize, compressed: usize) -> Result<(i32, i32)> {
+    let uncompressed_page_size: i32 = uncompressed.try_into().map_err(|_| {
+        Error::oos(format!(
+            "A page can only contain i32::MAX uncompressed bytes. This one contains {}",
+            uncompressed
+        ))
+    })?;
+
+    let compressed_page_size: i32 = compressed.try_into().map_err(|_| {
+        Error::oos(format!(
+            "A page can only contain i32::MAX compressed bytes. This one contains {}",
+            compressed
+        ))
+    })?;
+
+    Ok((uncompressed_page_size, compressed_page_size))
+}
+
+/// Contains page write metrics.
+pub struct PageWriteSpec {
+    pub header: ParquetPageHeader,
+    pub num_values: usize,
+    pub num_rows: Option<usize>,
+    pub header_size: u64,
+    pub offset: u64,
+    pub bytes_written: u64,
+    pub compression: Compression,
+    pub statistics: Option<Arc<dyn Statistics>>,
+}
+
+pub fn write_page<W: Write>(
+    writer: &mut W,
+    offset: u64,
+    compressed_page: &CompressedPage,
+) -> Result<PageWriteSpec> {
+    let num_values = compressed_page.num_values();
+    let selected_rows = compressed_page.selected_rows();
+
+    let header = match &compressed_page {
+        CompressedPage::Data(compressed_page) => assemble_data_page_header(compressed_page),
+        CompressedPage::Dict(compressed_page) => assemble_dict_page_header(compressed_page),
+    }?;
+
+    let header_size = write_page_header(writer, &header)?;
+    let mut bytes_written = header_size;
+
+    bytes_written += match &compressed_page {
+        CompressedPage::Data(compressed_page) => {
+            writer.write_all(&compressed_page.buffer)?;
+            compressed_page.buffer.len() as u64
+        },
+        CompressedPage::Dict(compressed_page) => {
+            writer.write_all(&compressed_page.buffer)?;
+            compressed_page.buffer.len() as u64
+        },
+    };
+
+    let statistics = match &compressed_page {
+        CompressedPage::Data(compressed_page) => compressed_page.statistics().transpose()?,
+        CompressedPage::Dict(_) => None,
+    };
+
+    Ok(PageWriteSpec {
+        header,
+        header_size,
+        offset,
+        bytes_written,
+        compression: compressed_page.compression(),
+        statistics,
+        num_rows: selected_rows.map(|x| x.last().unwrap().length),
+        num_values,
+    })
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub async fn write_page_async<W: AsyncWrite + Unpin + Send>(
+    writer: &mut W,
+    offset: u64,
+    compressed_page: &CompressedPage,
+) -> Result<PageWriteSpec> {
+    let num_values = compressed_page.num_values();
+    let selected_rows = compressed_page.selected_rows();
+
+    let header = match &compressed_page {
+        CompressedPage::Data(compressed_page) => assemble_data_page_header(compressed_page),
+        CompressedPage::Dict(compressed_page) => assemble_dict_page_header(compressed_page),
+    }?;
+
+    let header_size = write_page_header_async(writer, &header).await?;
+    let mut bytes_written = header_size as u64;
+
+    bytes_written += match &compressed_page {
+        CompressedPage::Data(compressed_page) => {
+            writer.write_all(&compressed_page.buffer).await?;
+            compressed_page.buffer.len() as u64
+        },
+        CompressedPage::Dict(compressed_page) => {
+            writer.write_all(&compressed_page.buffer).await?;
+            compressed_page.buffer.len() as u64
+        },
+    };
+
+    let statistics = match &compressed_page {
+        CompressedPage::Data(compressed_page) => compressed_page.statistics().transpose()?,
+        CompressedPage::Dict(_) => None,
+    };
+
+    Ok(PageWriteSpec {
+        header,
+        header_size,
+        offset,
+        bytes_written,
+        compression: compressed_page.compression(),
+        statistics,
+        num_rows: selected_rows.map(|x| x.last().unwrap().length),
+        num_values,
+    })
+}
+
+fn assemble_data_page_header(page: &CompressedDataPage) -> Result<ParquetPageHeader> {
+    let (uncompressed_page_size, compressed_page_size) =
+        maybe_bytes(page.uncompressed_size(), page.compressed_size())?;
+
+    let mut page_header = ParquetPageHeader {
+        type_: match page.header() {
+            DataPageHeader::V1(_) => PageType::DATA_PAGE,
+            DataPageHeader::V2(_) => PageType::DATA_PAGE_V2,
+        },
+        uncompressed_page_size,
+        compressed_page_size,
+        crc: None,
+        data_page_header: None,
+        index_page_header: None,
+        dictionary_page_header: None,
+        data_page_header_v2: None,
+    };
+
+    match page.header() {
+        DataPageHeader::V1(header) => {
+            page_header.data_page_header = Some(header.clone());
+        },
+        DataPageHeader::V2(header) => {
+            page_header.data_page_header_v2 = Some(header.clone());
+        },
+    }
+    Ok(page_header)
+}
+
+fn assemble_dict_page_header(page: &CompressedDictPage) -> Result<ParquetPageHeader> {
+    let (uncompressed_page_size, compressed_page_size) =
+        maybe_bytes(page.uncompressed_page_size, page.buffer.len())?;
+
+    let num_values: i32 = page.num_values.try_into().map_err(|_| {
+        Error::oos(format!(
+            "A dictionary page can only contain i32::MAX items. This one contains {}",
+            page.num_values
+        ))
+    })?;
+
+    Ok(ParquetPageHeader {
+        type_: PageType::DICTIONARY_PAGE,
+        uncompressed_page_size,
+        compressed_page_size,
+        crc: None,
+        data_page_header: None,
+        index_page_header: None,
+        dictionary_page_header: Some(DictionaryPageHeader {
+            num_values,
+            encoding: Encoding::PLAIN,
+            is_sorted: None,
+        }),
+        data_page_header_v2: None,
+    })
+}
+
+/// writes the page header into `writer`, returning the number of bytes used in the process.
+fn write_page_header<W: Write>(mut writer: &mut W, header: &ParquetPageHeader) -> Result<u64> {
+    let mut protocol = TCompactOutputProtocol::new(&mut writer);
+    Ok(header.write_to_out_protocol(&mut protocol)? as u64)
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+/// writes the page header into `writer`, returning the number of bytes used in the process.
+async fn write_page_header_async<W: AsyncWrite + Unpin + Send>(
+    mut writer: &mut W,
+    header: &ParquetPageHeader,
+) -> Result<u64> {
+    let mut protocol = TCompactOutputStreamProtocol::new(&mut writer);
+    Ok(header.write_to_out_stream_protocol(&mut protocol).await? as u64)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn dict_too_large() {
+        let page = CompressedDictPage::new(
+            vec![],
+            Compression::Uncompressed,
+            i32::MAX as usize + 1,
+            100,
+            false,
+        );
+        assert!(assemble_dict_page_header(&page).is_err());
+    }
+
+    #[test]
+    fn dict_too_many_values() {
+        let page = CompressedDictPage::new(
+            vec![],
+            Compression::Uncompressed,
+            0,
+            i32::MAX as usize + 1,
+            false,
+        );
+        assert!(assemble_dict_page_header(&page).is_err());
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/write/row_group.rs b/crates/polars-parquet/src/parquet/write/row_group.rs
new file mode 100644
index 000000000000..943079430aaf
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/row_group.rs
@@ -0,0 +1,200 @@
+use std::io::Write;
+
+#[cfg(feature = "async")]
+use futures::AsyncWrite;
+use parquet_format_safe::{ColumnChunk, RowGroup};
+
+use super::column_chunk::write_column_chunk;
+#[cfg(feature = "async")]
+use super::column_chunk::write_column_chunk_async;
+use super::page::{is_data_page, PageWriteSpec};
+use super::{DynIter, DynStreamingIterator};
+use crate::parquet::error::{Error, Result};
+use crate::parquet::metadata::{ColumnChunkMetaData, ColumnDescriptor};
+use crate::parquet::page::CompressedPage;
+
+pub struct ColumnOffsetsMetadata {
+    pub dictionary_page_offset: Option<i64>,
+    pub data_page_offset: Option<i64>,
+}
+
+impl ColumnOffsetsMetadata {
+    pub fn from_column_chunk(column_chunk: &ColumnChunk) -> ColumnOffsetsMetadata {
+        ColumnOffsetsMetadata {
+            dictionary_page_offset: column_chunk
+                .meta_data
+                .as_ref()
+                .map(|meta| meta.dictionary_page_offset)
+                .unwrap_or(None),
+            data_page_offset: column_chunk
+                .meta_data
+                .as_ref()
+                .map(|meta| meta.data_page_offset),
+        }
+    }
+
+    pub fn from_column_chunk_metadata(
+        column_chunk_metadata: &ColumnChunkMetaData,
+    ) -> ColumnOffsetsMetadata {
+        ColumnOffsetsMetadata {
+            dictionary_page_offset: column_chunk_metadata.dictionary_page_offset(),
+            data_page_offset: Some(column_chunk_metadata.data_page_offset()),
+        }
+    }
+
+    pub fn calc_row_group_file_offset(&self) -> Option<i64> {
+        self.dictionary_page_offset
+            .filter(|x| *x > 0_i64)
+            .or(self.data_page_offset)
+    }
+}
+
+fn compute_num_rows(columns: &[(ColumnChunk, Vec<PageWriteSpec>)]) -> Result<i64> {
+    columns
+        .get(0)
+        .map(|(_, specs)| {
+            let mut num_rows = 0;
+            specs
+                .iter()
+                .filter(|x| is_data_page(x))
+                .try_for_each(|spec| {
+                    num_rows += spec.num_rows.ok_or_else(|| {
+                        Error::oos("All data pages must declare the number of rows on it")
+                    })? as i64;
+                    Result::Ok(())
+                })?;
+            Result::Ok(num_rows)
+        })
+        .unwrap_or(Ok(0))
+}
+
+pub fn write_row_group<
+    'a,
+    W,
+    E, // external error any of the iterators may emit
+>(
+    writer: &mut W,
+    mut offset: u64,
+    descriptors: &[ColumnDescriptor],
+    columns: DynIter<'a, std::result::Result<DynStreamingIterator<'a, CompressedPage, E>, E>>,
+    ordinal: usize,
+) -> Result<(RowGroup, Vec<Vec<PageWriteSpec>>, u64)>
+where
+    W: Write,
+    Error: From<E>,
+    E: std::error::Error,
+{
+    let column_iter = descriptors.iter().zip(columns);
+
+    let initial = offset;
+    let columns = column_iter
+        .map(|(descriptor, page_iter)| {
+            let (column, page_specs, size) =
+                write_column_chunk(writer, offset, descriptor, page_iter?)?;
+            offset += size;
+            Ok((column, page_specs))
+        })
+        .collect::<Result<Vec<_>>>()?;
+    let bytes_written = offset - initial;
+
+    let num_rows = compute_num_rows(&columns)?;
+
+    // compute row group stats
+    let file_offset = columns
+        .get(0)
+        .map(|(column_chunk, _)| {
+            ColumnOffsetsMetadata::from_column_chunk(column_chunk).calc_row_group_file_offset()
+        })
+        .unwrap_or(None);
+
+    let total_byte_size = columns
+        .iter()
+        .map(|(c, _)| c.meta_data.as_ref().unwrap().total_uncompressed_size)
+        .sum();
+    let total_compressed_size = columns
+        .iter()
+        .map(|(c, _)| c.meta_data.as_ref().unwrap().total_compressed_size)
+        .sum();
+
+    let (columns, specs) = columns.into_iter().unzip();
+
+    Ok((
+        RowGroup {
+            columns,
+            total_byte_size,
+            num_rows,
+            sorting_columns: None,
+            file_offset,
+            total_compressed_size: Some(total_compressed_size),
+            ordinal: ordinal.try_into().ok(),
+        },
+        specs,
+        bytes_written,
+    ))
+}
+
+#[cfg(feature = "async")]
+#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
+pub async fn write_row_group_async<
+    'a,
+    W,
+    E, // external error any of the iterators may emit
+>(
+    writer: &mut W,
+    mut offset: u64,
+    descriptors: &[ColumnDescriptor],
+    columns: DynIter<'a, std::result::Result<DynStreamingIterator<'a, CompressedPage, E>, E>>,
+    ordinal: usize,
+) -> Result<(RowGroup, Vec<Vec<PageWriteSpec>>, u64)>
+where
+    W: AsyncWrite + Unpin + Send,
+    Error: From<E>,
+    E: std::error::Error,
+{
+    let column_iter = descriptors.iter().zip(columns);
+
+    let initial = offset;
+    let mut columns = vec![];
+    for (descriptor, page_iter) in column_iter {
+        let (column, page_specs, size) =
+            write_column_chunk_async(writer, offset, descriptor, page_iter?).await?;
+        offset += size;
+        columns.push((column, page_specs));
+    }
+    let bytes_written = offset - initial;
+
+    let num_rows = compute_num_rows(&columns)?;
+
+    // compute row group stats
+    let file_offset = columns
+        .get(0)
+        .map(|(column_chunk, _)| {
+            ColumnOffsetsMetadata::from_column_chunk(column_chunk).calc_row_group_file_offset()
+        })
+        .unwrap_or(None);
+
+    let total_byte_size = columns
+        .iter()
+        .map(|(c, _)| c.meta_data.as_ref().unwrap().total_uncompressed_size)
+        .sum();
+    let total_compressed_size = columns
+        .iter()
+        .map(|(c, _)| c.meta_data.as_ref().unwrap().total_compressed_size)
+        .sum();
+
+    let (columns, specs) = columns.into_iter().unzip();
+
+    Ok((
+        RowGroup {
+            columns,
+            total_byte_size,
+            num_rows: num_rows as i64,
+            sorting_columns: None,
+            file_offset,
+            total_compressed_size: Some(total_compressed_size),
+            ordinal: ordinal.try_into().ok(),
+        },
+        specs,
+        bytes_written,
+    ))
+}
diff --git a/crates/polars-parquet/src/parquet/write/statistics.rs b/crates/polars-parquet/src/parquet/write/statistics.rs
new file mode 100644
index 000000000000..f0aa9cc2011a
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/statistics.rs
@@ -0,0 +1,323 @@
+use std::sync::Arc;
+
+use crate::parquet::error::{Error, Result};
+use crate::parquet::schema::types::PhysicalType;
+use crate::parquet::statistics::*;
+use crate::parquet::types::NativeType;
+
+#[inline]
+fn reduce_single<T, F: Fn(T, T) -> T>(lhs: Option<T>, rhs: Option<T>, op: F) -> Option<T> {
+    match (lhs, rhs) {
+        (None, None) => None,
+        (Some(x), None) => Some(x),
+        (None, Some(x)) => Some(x),
+        (Some(x), Some(y)) => Some(op(x, y)),
+    }
+}
+
+#[inline]
+fn reduce_vec8(lhs: Option<Vec<u8>>, rhs: &Option<Vec<u8>>, max: bool) -> Option<Vec<u8>> {
+    match (lhs, rhs) {
+        (None, None) => None,
+        (Some(x), None) => Some(x),
+        (None, Some(x)) => Some(x.clone()),
+        (Some(x), Some(y)) => Some(ord_binary(x, y.clone(), max)),
+    }
+}
+
+pub fn reduce(stats: &[&Option<Arc<dyn Statistics>>]) -> Result<Option<Arc<dyn Statistics>>> {
+    if stats.is_empty() {
+        return Ok(None);
+    }
+    let stats = stats
+        .iter()
+        .filter_map(|x| x.as_ref())
+        .map(|x| x.as_ref())
+        .collect::<Vec<&dyn Statistics>>();
+    if stats.is_empty() {
+        return Ok(None);
+    };
+
+    let same_type = stats
+        .iter()
+        .skip(1)
+        .all(|x| x.physical_type() == stats[0].physical_type());
+    if !same_type {
+        return Err(Error::oos("The statistics do not have the same data_type"));
+    };
+    Ok(match stats[0].physical_type() {
+        PhysicalType::Boolean => {
+            let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap());
+            Some(Arc::new(reduce_boolean(stats)))
+        },
+        PhysicalType::Int32 => {
+            let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap());
+            Some(Arc::new(reduce_primitive::<i32, _>(stats)))
+        },
+        PhysicalType::Int64 => {
+            let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap());
+            Some(Arc::new(reduce_primitive::<i64, _>(stats)))
+        },
+        PhysicalType::Float => {
+            let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap());
+            Some(Arc::new(reduce_primitive::<f32, _>(stats)))
+        },
+        PhysicalType::Double => {
+            let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap());
+            Some(Arc::new(reduce_primitive::<f64, _>(stats)))
+        },
+        PhysicalType::ByteArray => {
+            let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap());
+            Some(Arc::new(reduce_binary(stats)))
+        },
+        PhysicalType::FixedLenByteArray(_) => {
+            let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap());
+            Some(Arc::new(reduce_fix_len_binary(stats)))
+        },
+        _ => todo!(),
+    })
+}
+
+fn reduce_binary<'a, I: Iterator<Item = &'a BinaryStatistics>>(mut stats: I) -> BinaryStatistics {
+    let initial = stats.next().unwrap().clone();
+    stats.fold(initial, |mut acc, new| {
+        acc.min_value = reduce_vec8(acc.min_value, &new.min_value, false);
+        acc.max_value = reduce_vec8(acc.max_value, &new.max_value, true);
+        acc.null_count = reduce_single(acc.null_count, new.null_count, |x, y| x + y);
+        acc.distinct_count = None;
+        acc
+    })
+}
+
+fn reduce_fix_len_binary<'a, I: Iterator<Item = &'a FixedLenStatistics>>(
+    mut stats: I,
+) -> FixedLenStatistics {
+    let initial = stats.next().unwrap().clone();
+    stats.fold(initial, |mut acc, new| {
+        acc.min_value = reduce_vec8(acc.min_value, &new.min_value, false);
+        acc.max_value = reduce_vec8(acc.max_value, &new.max_value, true);
+        acc.null_count = reduce_single(acc.null_count, new.null_count, |x, y| x + y);
+        acc.distinct_count = None;
+        acc
+    })
+}
+
+fn ord_binary(a: Vec<u8>, b: Vec<u8>, max: bool) -> Vec<u8> {
+    for (v1, v2) in a.iter().zip(b.iter()) {
+        match v1.cmp(v2) {
+            std::cmp::Ordering::Greater => {
+                if max {
+                    return a;
+                } else {
+                    return b;
+                }
+            },
+            std::cmp::Ordering::Less => {
+                if max {
+                    return b;
+                } else {
+                    return a;
+                }
+            },
+            _ => {},
+        }
+    }
+    a
+}
+
+fn reduce_boolean<'a, I: Iterator<Item = &'a BooleanStatistics>>(
+    mut stats: I,
+) -> BooleanStatistics {
+    let initial = stats.next().unwrap().clone();
+    stats.fold(initial, |mut acc, new| {
+        acc.min_value = reduce_single(
+            acc.min_value,
+            new.min_value,
+            |x, y| if x & !(y) { y } else { x },
+        );
+        acc.max_value = reduce_single(
+            acc.max_value,
+            new.max_value,
+            |x, y| if x & !(y) { x } else { y },
+        );
+        acc.null_count = reduce_single(acc.null_count, new.null_count, |x, y| x + y);
+        acc.distinct_count = None;
+        acc
+    })
+}
+
+fn reduce_primitive<
+    'a,
+    T: NativeType + std::cmp::PartialOrd,
+    I: Iterator<Item = &'a PrimitiveStatistics<T>>,
+>(
+    mut stats: I,
+) -> PrimitiveStatistics<T> {
+    let initial = stats.next().unwrap().clone();
+    stats.fold(initial, |mut acc, new| {
+        acc.min_value = reduce_single(
+            acc.min_value,
+            new.min_value,
+            |x, y| if x > y { y } else { x },
+        );
+        acc.max_value = reduce_single(
+            acc.max_value,
+            new.max_value,
+            |x, y| if x > y { x } else { y },
+        );
+        acc.null_count = reduce_single(acc.null_count, new.null_count, |x, y| x + y);
+        acc.distinct_count = None;
+        acc
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parquet::schema::types::PrimitiveType;
+
+    #[test]
+    fn binary() -> Result<()> {
+        let iter = vec![
+            BinaryStatistics {
+                primitive_type: PrimitiveType::from_physical(
+                    "bla".to_string(),
+                    PhysicalType::ByteArray,
+                ),
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(vec![1, 2]),
+                max_value: Some(vec![3, 4]),
+            },
+            BinaryStatistics {
+                primitive_type: PrimitiveType::from_physical(
+                    "bla".to_string(),
+                    PhysicalType::ByteArray,
+                ),
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(vec![4, 5]),
+                max_value: None,
+            },
+        ];
+        let a = reduce_binary(iter.iter());
+
+        assert_eq!(
+            a,
+            BinaryStatistics {
+                primitive_type: PrimitiveType::from_physical(
+                    "bla".to_string(),
+                    PhysicalType::ByteArray,
+                ),
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(vec![1, 2]),
+                max_value: Some(vec![3, 4]),
+            },
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn fixed_len_binary() -> Result<()> {
+        let iter = vec![
+            FixedLenStatistics {
+                primitive_type: PrimitiveType::from_physical(
+                    "bla".to_string(),
+                    PhysicalType::FixedLenByteArray(2),
+                ),
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(vec![1, 2]),
+                max_value: Some(vec![3, 4]),
+            },
+            FixedLenStatistics {
+                primitive_type: PrimitiveType::from_physical(
+                    "bla".to_string(),
+                    PhysicalType::FixedLenByteArray(2),
+                ),
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(vec![4, 5]),
+                max_value: None,
+            },
+        ];
+        let a = reduce_fix_len_binary(iter.iter());
+
+        assert_eq!(
+            a,
+            FixedLenStatistics {
+                primitive_type: PrimitiveType::from_physical(
+                    "bla".to_string(),
+                    PhysicalType::FixedLenByteArray(2),
+                ),
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(vec![1, 2]),
+                max_value: Some(vec![3, 4]),
+            },
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn boolean() -> Result<()> {
+        let iter = vec![
+            BooleanStatistics {
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(false),
+                max_value: Some(false),
+            },
+            BooleanStatistics {
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(true),
+                max_value: Some(true),
+            },
+        ];
+        let a = reduce_boolean(iter.iter());
+
+        assert_eq!(
+            a,
+            BooleanStatistics {
+                null_count: Some(0),
+                distinct_count: None,
+                min_value: Some(false),
+                max_value: Some(true),
+            },
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn primitive() -> Result<()> {
+        let iter = vec![PrimitiveStatistics {
+            null_count: Some(2),
+            distinct_count: None,
+            min_value: Some(30),
+            max_value: Some(70),
+            primitive_type: PrimitiveType::from_physical("bla".to_string(), PhysicalType::Int32),
+        }];
+        let a = reduce_primitive(iter.iter());
+
+        assert_eq!(
+            a,
+            PrimitiveStatistics {
+                null_count: Some(2),
+                distinct_count: None,
+                min_value: Some(30),
+                max_value: Some(70),
+                primitive_type: PrimitiveType::from_physical(
+                    "bla".to_string(),
+                    PhysicalType::Int32,
+                ),
+            },
+        );
+
+        Ok(())
+    }
+}
diff --git a/crates/polars-parquet/src/parquet/write/stream.rs b/crates/polars-parquet/src/parquet/write/stream.rs
new file mode 100644
index 000000000000..5ef3b32d2844
--- /dev/null
+++ b/crates/polars-parquet/src/parquet/write/stream.rs
@@ -0,0 +1,192 @@
+use std::io::Write;
+
+use futures::{AsyncWrite, AsyncWriteExt};
+use parquet_format_safe::thrift::protocol::TCompactOutputStreamProtocol;
+use parquet_format_safe::{FileMetaData, RowGroup};
+
+use super::row_group::write_row_group_async;
+use super::{RowGroupIter, WriteOptions};
+use crate::parquet::error::{Error, Result};
+use crate::parquet::metadata::{KeyValue, SchemaDescriptor};
+use crate::parquet::write::indexes::{write_column_index_async, write_offset_index_async};
+use crate::parquet::write::page::PageWriteSpec;
+use crate::parquet::write::State;
+use crate::parquet::{FOOTER_SIZE, PARQUET_MAGIC};
+
+async fn start_file<W: AsyncWrite + Unpin>(writer: &mut W) -> Result<u64> {
+    writer.write_all(&PARQUET_MAGIC).await?;
+    Ok(PARQUET_MAGIC.len() as u64)
+}
+
+async fn end_file<W: AsyncWrite + Unpin + Send>(
+    mut writer: &mut W,
+    metadata: FileMetaData,
+) -> Result<u64> {
+    // Write file metadata
+    let mut protocol = TCompactOutputStreamProtocol::new(&mut writer);
+    let metadata_len = metadata.write_to_out_stream_protocol(&mut protocol).await? as i32;
+
+    // Write footer
+    let metadata_bytes = metadata_len.to_le_bytes();
+    let mut footer_buffer = [0u8; FOOTER_SIZE as usize];
+    (0..4).for_each(|i| {
+        footer_buffer[i] = metadata_bytes[i];
+    });
+
+    (&mut footer_buffer[4..]).write_all(&PARQUET_MAGIC)?;
+    writer.write_all(&footer_buffer).await?;
+    writer.flush().await?;
+    Ok(metadata_len as u64 + FOOTER_SIZE)
+}
+
+/// An interface to write a parquet file asynchronously.
+/// Use `start` to write the header, `write` to write a row group,
+/// and `end` to write the footer.
+pub struct FileStreamer<W: AsyncWrite + Unpin + Send> {
+    writer: W,
+    schema: SchemaDescriptor,
+    options: WriteOptions,
+    created_by: Option<String>,
+
+    offset: u64,
+    row_groups: Vec<RowGroup>,
+    page_specs: Vec<Vec<Vec<PageWriteSpec>>>,
+    /// Used to store the current state for writing the file
+    state: State,
+}
+
+// Accessors
+impl<W: AsyncWrite + Unpin + Send> FileStreamer<W> {
+    /// The options assigned to the file
+    pub fn options(&self) -> &WriteOptions {
+        &self.options
+    }
+
+    /// The [`SchemaDescriptor`] assigned to this file
+    pub fn schema(&self) -> &SchemaDescriptor {
+        &self.schema
+    }
+}
+
+impl<W: AsyncWrite + Unpin + Send> FileStreamer<W> {
+    /// Returns a new [`FileStreamer`].
+    pub fn new(
+        writer: W,
+        schema: SchemaDescriptor,
+        options: WriteOptions,
+        created_by: Option<String>,
+    ) -> Self {
+        Self {
+            writer,
+            schema,
+            options,
+            created_by,
+            offset: 0,
+            row_groups: vec![],
+            page_specs: vec![],
+            state: State::Initialised,
+        }
+    }
+
+    /// Writes the header of the file.
+    ///
+    /// This is automatically called by [`Self::write`] if not called following [`Self::new`].
+    ///
+    /// # Errors
+    /// Returns an error if data has been written to the file.
+    async fn start(&mut self) -> Result<()> {
+        if self.offset == 0 {
+            self.offset = start_file(&mut self.writer).await? as u64;
+            self.state = State::Started;
+            Ok(())
+        } else {
+            Err(Error::InvalidParameter(
+                "Start cannot be called twice".to_string(),
+            ))
+        }
+    }
+
+    /// Writes a row group to the file.
+    pub async fn write<E>(&mut self, row_group: RowGroupIter<'_, E>) -> Result<()>
+    where
+        Error: From<E>,
+        E: std::error::Error,
+    {
+        if self.offset == 0 {
+            self.start().await?;
+        }
+
+        let ordinal = self.row_groups.len();
+        let (group, specs, size) = write_row_group_async(
+            &mut self.writer,
+            self.offset,
+            self.schema.columns(),
+            row_group,
+            ordinal,
+        )
+        .await?;
+        self.offset += size;
+        self.row_groups.push(group);
+        self.page_specs.push(specs);
+        Ok(())
+    }
+
+    /// Writes the footer of the parquet file. Returns the total size of the file and the
+    /// underlying writer.
+    pub async fn end(&mut self, key_value_metadata: Option<Vec<KeyValue>>) -> Result<u64> {
+        if self.offset == 0 {
+            self.start().await?;
+        }
+
+        if self.state != State::Started {
+            return Err(Error::InvalidParameter(
+                "End cannot be called twice".to_string(),
+            ));
+        }
+        // compute file stats
+        let num_rows = self.row_groups.iter().map(|group| group.num_rows).sum();
+
+        if self.options.write_statistics {
+            // write column indexes (require page statistics)
+            for (group, pages) in self.row_groups.iter_mut().zip(self.page_specs.iter()) {
+                for (column, pages) in group.columns.iter_mut().zip(pages.iter()) {
+                    let offset = self.offset;
+                    column.column_index_offset = Some(offset as i64);
+                    self.offset += write_column_index_async(&mut self.writer, pages).await?;
+                    let length = self.offset - offset;
+                    column.column_index_length = Some(length as i32);
+                }
+            }
+        };
+
+        // write offset index
+        for (group, pages) in self.row_groups.iter_mut().zip(self.page_specs.iter()) {
+            for (column, pages) in group.columns.iter_mut().zip(pages.iter()) {
+                let offset = self.offset;
+                column.offset_index_offset = Some(offset as i64);
+                self.offset += write_offset_index_async(&mut self.writer, pages).await?;
+                column.offset_index_length = Some((self.offset - offset) as i32);
+            }
+        }
+
+        let metadata = FileMetaData::new(
+            self.options.version.into(),
+            self.schema.clone().into_thrift(),
+            num_rows,
+            self.row_groups.clone(),
+            key_value_metadata,
+            self.created_by.clone(),
+            None,
+            None,
+            None,
+        );
+
+        let len = end_file(&mut self.writer, metadata).await?;
+        Ok(self.offset + len)
+    }
+
+    /// Returns the underlying writer.
+    pub fn into_inner(self) -> W {
+        self.writer
+    }
+}
diff --git a/crates/polars-utils/Cargo.toml b/crates/polars-utils/Cargo.toml
index c017f731739c..56a56493dce3 100644
--- a/crates/polars-utils/Cargo.toml
+++ b/crates/polars-utils/Cargo.toml
@@ -14,6 +14,7 @@ polars-error = { workspace = true }
 ahash = { workspace = true }
 bytemuck = { workspace = true }
 hashbrown = { workspace = true }
+indexmap = { workspace = true }
 num-traits = { workspace = true }
 once_cell = { workspace = true }
 rayon = { workspace = true }
diff --git a/crates/polars-utils/src/aliases.rs b/crates/polars-utils/src/aliases.rs
index a2ca71d1ff47..5ecb1b0033d9 100644
--- a/crates/polars-utils/src/aliases.rs
+++ b/crates/polars-utils/src/aliases.rs
@@ -2,3 +2,60 @@ use ahash::RandomState;
 
 pub type PlHashMap<K, V> = hashbrown::HashMap<K, V, RandomState>;
 pub type PlHashSet<V> = hashbrown::HashSet<V, RandomState>;
+pub type PlIndexMap<K, V> = indexmap::IndexMap<K, V, RandomState>;
+pub type PlIndexSet<K> = indexmap::IndexSet<K, RandomState>;
+
+pub trait InitHashMaps {
+    type HashMap;
+
+    fn new() -> Self::HashMap;
+
+    fn with_capacity(capacity: usize) -> Self::HashMap;
+}
+
+impl<K, V> InitHashMaps for PlHashMap<K, V> {
+    type HashMap = Self;
+
+    fn new() -> Self::HashMap {
+        Self::with_capacity_and_hasher(0, Default::default())
+    }
+
+    fn with_capacity(capacity: usize) -> Self {
+        Self::with_capacity_and_hasher(capacity, Default::default())
+    }
+}
+impl<K> InitHashMaps for PlHashSet<K> {
+    type HashMap = Self;
+
+    fn new() -> Self::HashMap {
+        Self::with_capacity_and_hasher(0, Default::default())
+    }
+
+    fn with_capacity(capacity: usize) -> Self {
+        Self::with_capacity_and_hasher(capacity, Default::default())
+    }
+}
+
+impl<K> InitHashMaps for PlIndexSet<K> {
+    type HashMap = Self;
+
+    fn new() -> Self::HashMap {
+        Self::with_capacity_and_hasher(0, Default::default())
+    }
+
+    fn with_capacity(capacity: usize) -> Self::HashMap {
+        Self::with_capacity_and_hasher(capacity, Default::default())
+    }
+}
+
+impl<K, V> InitHashMaps for PlIndexMap<K, V> {
+    type HashMap = Self;
+
+    fn new() -> Self::HashMap {
+        Self::with_capacity_and_hasher(0, Default::default())
+    }
+
+    fn with_capacity(capacity: usize) -> Self::HashMap {
+        Self::with_capacity_and_hasher(capacity, Default::default())
+    }
+}
diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock
index 1c5c1d9140c6..08f6e965944a 100644
--- a/py-polars/Cargo.lock
+++ b/py-polars/Cargo.lock
@@ -1475,25 +1475,6 @@ dependencies = [
  "futures",
 ]
 
-[[package]]
-name = "parquet2"
-version = "0.17.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "579fe5745f02cef3d5f236bfed216fd4693e49e4e920a13475c6132233283bce"
-dependencies = [
- "async-stream",
- "brotli",
- "flate2",
- "futures",
- "lz4",
- "parquet-format-safe",
- "seq-macro",
- "snap",
- "streaming-decompression",
- "xxhash-rust",
- "zstd 0.12.4",
-]
-
 [[package]]
 name = "parse-zoneinfo"
 version = "0.3.0"
@@ -1668,7 +1649,6 @@ dependencies = [
  "arrow-format",
  "avro-schema",
  "object_store",
- "parquet2",
  "regex",
  "simdutf8",
  "thiserror",
@@ -1806,14 +1786,23 @@ name = "polars-parquet"
 version = "0.34.2"
 dependencies = [
  "ahash",
+ "async-stream",
  "base64",
+ "brotli",
  "ethnum",
+ "flate2",
  "futures",
+ "lz4",
  "num-traits",
- "parquet2",
+ "parquet-format-safe",
  "polars-arrow",
  "polars-error",
+ "polars-utils",
+ "seq-macro",
  "simdutf8",
+ "snap",
+ "streaming-decompression",
+ "zstd 0.12.4",
 ]
 
 [[package]]
@@ -1917,6 +1906,7 @@ dependencies = [
  "ahash",
  "bytemuck",
  "hashbrown 0.14.0",
+ "indexmap 2.0.0",
  "num-traits",
  "once_cell",
  "polars-error",