Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rust data loader example #1452

Merged
merged 5 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
- [`loader-r-to-csv`](https://observablehq.observablehq.cloud/framework-example-loader-r-to-csv/) - Generating CSV from R
- [`loader-r-to-jpeg`](https://observablehq.observablehq.cloud/framework-example-loader-r-to-jpeg/) - Generating JPEG from R
- [`loader-r-to-json`](https://observablehq.observablehq.cloud/framework-example-loader-r-to-json/) - Generating JSON from R
- [`loader-rust-to-json`](https://observablehq.observablehq.cloud/framework-example-loader-rust-to-json/) - Generating JSON from Rust
- [`loader-snowflake`](https://observablehq.observablehq.cloud/framework-example-loader-snowflake/) - Loading data from Snowflake
- [`netcdf-contours`](https://observablehq.observablehq.cloud/framework-example-netcdf-contours/) - Converting NetCDF to GeoJSON with `netcdfjs` and `d3-geo-voronoi`

Expand Down
4 changes: 4 additions & 0 deletions examples/loader-rust-to-json/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.DS_Store
/dist/
node_modules/
yarn-error.log
9 changes: 9 additions & 0 deletions examples/loader-rust-to-json/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[Framework examples →](../)

# Rust data loader to generate JSON

View live: <https://observablehq.observablehq.cloud/framework-example-loader-rust-to-json/>

This Observable Framework example demonstrates how to write a data loader in Rust that runs a Monte Carlo simulation of poker hands, calculates statistics about how often each category of hand was found, then outputs JSON.

The data loader lives in [`src/data/poker.json.rs`](./src/data/poker.json.rs).
3 changes: 3 additions & 0 deletions examples/loader-rust-to-json/observablehq.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export default {
root: "src"
};
20 changes: 20 additions & 0 deletions examples/loader-rust-to-json/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"type": "module",
"private": true,
"scripts": {
"clean": "rimraf src/.observablehq/cache",
"build": "rimraf dist && observable build",
"dev": "observable preview",
"deploy": "observable deploy",
"observable": "observable"
},
"dependencies": {
"@observablehq/framework": "^1.8.0"
},
"devDependencies": {
"rimraf": "^5.0.5"
},
"engines": {
"node": ">=18"
}
}
1 change: 1 addition & 0 deletions examples/loader-rust-to-json/src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/.observablehq/cache/
141 changes: 141 additions & 0 deletions examples/loader-rust-to-json/src/data/poker-ts.json.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import {max, min, rollup, sort} from "d3-array";

function main() {
const COUNT = 100_000;
const start = performance.now();

const counts = Array.from({length: COUNT})
// Calculate the category of random hands
.map(() => {
const hand = Hand.random();
// Convert the category into a one-element hashmap, so the reducer
// can sum up all the counts for each category.
return {[hand.categorize()]: 1};
})
// count up each category
.reduce((acc, next) => {
for (const [category, count] of Object.entries(next)) {
acc[category] = (acc[category] ?? 0) + count;
}
return acc;
}, {});

const tidyData = sort(
Object.entries(counts).map(([category, count]) => ({category, count})),
(d) => d.category
);

process.stdout.write(
JSON.stringify({
summary: tidyData,
meta: {count: COUNT, duration_ms: performance.now() - start}
})
);
}

// Here, we create types for the domain model of a poker hand. Working with
// specific types helps makes the rest of the code simpler.

class Hand {
constructor(public cards: Card[]) {}

static random(): Hand {
const cards: Card[] = [];
while (cards.length < 5) {
const rank = Math.floor(Math.random() * 13 + 1);
const suitRand = Math.random();
const suit =
suitRand < 0.25 ? Suit.Clubs : suitRand < 0.5 ? Suit.Diamonds : suitRand < 0.75 ? Suit.Hearts : Suit.Spades;
const card = {rank, suit};
if (cards.some((c) => c.rank === card.rank && c.suit === card.suit)) {
continue;
}
cards.push(card);
}
return new Hand(cards);
}

categorize(): HandCategory {
const rankCounts = rollup(
this.cards,
(ds) => ds.length,
(d) => d.rank
);
const suitCounts = rollup(
this.cards,
(ds) => ds.length,
(d) => d.rank
);

const isFlush = suitCounts.size == 1;

let isStraight;

if (this.cards.some((c) => c.rank == 1)) {
// Handle aces
const minRank = min(
this.cards.filter((c) => c.rank !== 1),
(d) => d.rank
);
const maxRank = max(
this.cards.filter((c) => c.rank !== 1),
(d) => d.rank
);
isStraight = (minRank == 2 && maxRank == 5) || (minRank == 10 && maxRank == 13);
} else {
const minRank = min(this.cards, (d) => d.rank);
const maxRank = max(this.cards, (d) => d.rank);
isStraight = maxRank! - minRank! === this.cards.length - 1;
}

if (isFlush && isStraight) {
return HandCategory.StraightFlush;
} else if (Array.from(rankCounts.values()).some((count) => count === 4)) {
return HandCategory.FourOfAKind;
} else if (
Array.from(rankCounts.values()).some((count) => count === 3) &&
Array.from(rankCounts.values()).some((count) => count === 2)
) {
return HandCategory.FullHouse;
} else if (isFlush) {
return HandCategory.Flush;
} else if (isStraight) {
return HandCategory.Straight;
} else if (Array.from(rankCounts.values()).some((count) => count === 3)) {
return HandCategory.ThreeOfAKind;
} else if (
Array.from(rankCounts.values())
.filter((count) => count === 2)
.length == 2
) {
return HandCategory.TwoPair;
} else if (Array.from(rankCounts.values()).some((count) => count === 2)) {
return HandCategory.OnePair;
} else {
return HandCategory.HighCard;
}
}
}

type Card = {rank: number; suit: Suit};

enum Suit {
Clubs,
Diamonds,
Hearts,
Spades
}

enum HandCategory {
HighCard = "HighCard",
OnePair = "OnePair",
TwoPair = "TwoPair",
ThreeOfAKind = "ThreeOfAKind",
Straight = "Straight",
Flush = "Flush",
FullHouse = "FullHouse",
FourOfAKind = "FourOfAKind",
StraightFlush = "StraightFlush"
}

main();
162 changes: 162 additions & 0 deletions examples/loader-rust-to-json/src/data/poker.json.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
//! Since Framework uses rust-script, we can define dependencies here.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is really cool

//!
//! ```cargo
//! [dependencies]
//! serde = { version = "1.0.203", features = ["derive"] }
//! serde_json = "1.0.117"
//! rand = "0.8.5"
//! rayon = "1.10.0"
//! ```

use rand::Rng;
use rayon::prelude::*;
use serde::Serialize;
use serde_json::json;
use std::collections::HashMap;

fn main() {
const COUNT: u32 = 10_000_000;
let start = std::time::Instant::now();

let counts = (0..COUNT)
// This line breaks the work up into multiple parallel jobs.
.into_par_iter()
// Calculate the category of random hands
.map(|_| {
let hand = Hand::random();
// Convert the category into a one-element hashmap, so the reducer
// can sum up all the counts for each category.
let mut map = HashMap::new();
map.insert(hand.categorize(), 1);
map
})
// count up each category
.reduce(
|| HashMap::with_capacity(10),
|mut acc, map| {
for (category, count) in map {
*acc.entry(category).or_insert(0) += count;
}
acc
},
);

let mut tidy_data = counts
.into_iter()
.map(|(category, count)| SummaryRow { category, count })
.collect::<Vec<_>>();
tidy_data.sort_by_key(|data| data.category);

serde_json::to_writer(std::io::stdout(), &json!({
"summary": tidy_data,
"meta": { "count": COUNT, "duration_ms": start.elapsed().as_millis() },
})).unwrap();
}

// Here, we create types for the domain model of a poker hand. Working with
// specific types helps makes the rest of the code simpler.

#[derive(Debug, Clone, Serialize)]
struct SummaryRow {
category: HandCategory,
count: u32,
}

#[derive(Debug, PartialEq, Clone, Serialize)]
struct Hand(Vec<Card>);

#[derive(Debug, PartialEq, Clone, Copy, Serialize)]
struct Card {
/// 1 is an Ace, 2-10 are the numbered cards, 11 is Jack, 12 is Queen, 13 is King.
rank: u8,
suit: Suit,
}

#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Hash)]
enum Suit {
Clubs,
Diamonds,
Hearts,
Spades,
}

#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize)]
enum HandCategory {
HighCard,
OnePair,
TwoPair,
ThreeOfAKind,
Straight,
Flush,
FullHouse,
FourOfAKind,
StraightFlush,
}

// With the data domain specified, we can write the logic to generate hands and categorize them.

impl Hand {
/// Generate a random 5 card hand
fn random() -> Self {
let mut rng = rand::thread_rng();
let mut cards = Vec::with_capacity(5);
while cards.len() < 5 {
let rank = rng.gen_range(1..=13);
let suit = match rng.gen_range(0..4) {
0 => Suit::Clubs,
1 => Suit::Diamonds,
2 => Suit::Hearts,
3 => Suit::Spades,
_ => unreachable!(),
};
let card = Card { rank, suit };
if cards.iter().any(|&c| c == card) { continue };
cards.push(card);
}
Self(cards)
}

fn categorize(&self) -> HandCategory {
let rank_counts = self.0.iter().fold(HashMap::new(), |mut acc, card| {
*acc.entry(card.rank).or_insert(0) += 1;
acc
});
let suit_counts = self.0.iter().fold(HashMap::new(), |mut acc, card| {
*acc.entry(card.suit).or_insert(0) += 1;
acc
});
let is_flush = suit_counts.len() == 1;
let is_straight = if self.0.iter().any(|card| card.rank == 1) {
// Handle aces
let min_rank = self.0.iter().map(|card| card.rank).filter(|&rank| rank != 1).min().unwrap();
let max_rank = self.0.iter().map(|card| card.rank).filter(|&rank| rank != 1).max().unwrap();
(min_rank == 2 && max_rank == 5) || (min_rank == 10 && max_rank == 13)
} else {
let min_rank = self.0.iter().map(|card| card.rank).min().unwrap();
let max_rank = self.0.iter().map(|card| card.rank).max().unwrap();
(max_rank - min_rank) as usize == self.0.len() - 1
};

if is_flush && is_straight {
HandCategory::StraightFlush
} else if rank_counts.values().any(|&count| count == 4) {
HandCategory::FourOfAKind
} else if rank_counts.values().any(|&count| count == 3)
&& rank_counts.values().any(|&count| count == 2)
{
HandCategory::FullHouse
} else if is_flush {
HandCategory::Flush
} else if is_straight {
HandCategory::Straight
} else if rank_counts.values().any(|&count| count == 3) {
HandCategory::ThreeOfAKind
} else if rank_counts.values().filter(|&&count| count == 2).count() == 2 {
HandCategory::TwoPair
} else if rank_counts.values().any(|&count| count == 2) {
HandCategory::OnePair
} else {
HandCategory::HighCard
}
}
}
Loading