From d83b1b0a481a260a0b7eeb3b41e5ef24adc08160 Mon Sep 17 00:00:00 2001 From: Sylwia Mielnicka Date: Tue, 3 Mar 2020 07:10:11 +0100 Subject: [PATCH 1/2] Update histograms.md Added `Accessing the y-axis values` section --- doc/python/histograms.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/doc/python/histograms.md b/doc/python/histograms.md index 76bc94d1d7d..8f003ed869a 100644 --- a/doc/python/histograms.md +++ b/doc/python/histograms.md @@ -69,6 +69,32 @@ fig = px.histogram(df, x="total_bill", nbins=20) fig.show() ``` +#### Accessing the y-axis values + +JavaScript calculates the y-axis (count) values on the fly in the browser, so it's not accessible in the `fig`. You can manually calculate it using `pandas.cut` or `np.digitize`. + +```python +import plotly.express as px +import pandas as pd + +df = px.data.tips() + +# create the bins; use the `range` to get the same bin size as in the histogram in the previous section. +df["total_bill_bins"] = pd.cut(df.total_bill, bins=range(0, 60, 5), right=False) + +# calculate counts +df_counts = df.pivot_table(index="total_bill_bins", values="size", aggfunc='count').reset_index() +df_counts.rename(columns={"size": "count"}, inplace=True) + +# sort, then convert to string +df_counts = df_counts.sort_values(by="total_bill_bins") +df_counts["total_bill_bins"] = df_counts["total_bill_bins"].astype(str) + +# display calculated counts on the bar chart +fig = px.bar(df_counts, x="total_bill_bins", y="count") +fig.show() +``` + #### Type of normalization The default mode is to represent the count of samples in each bin. With the `histnorm` argument, it is also possible to represent the percentage or fraction of samples in each bin (`histnorm='percent'` or `probability`), or a density histogram (the sum of bars is equal to 100, `density`), or a probability density histogram (sum equal to 1, `probability density`). From 3acbd3c6dc03b283c8ead47d0c560bee2394994d Mon Sep 17 00:00:00 2001 From: Sylwia Mielnicka Date: Fri, 6 Mar 2020 11:08:34 +0100 Subject: [PATCH 2/2] Update histograms.md 'Accessing the counts (y-axis) values' - use example with numpy instead of pandas --- doc/python/histograms.md | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/doc/python/histograms.md b/doc/python/histograms.md index 8f003ed869a..c558ba5ffa1 100644 --- a/doc/python/histograms.md +++ b/doc/python/histograms.md @@ -69,29 +69,20 @@ fig = px.histogram(df, x="total_bill", nbins=20) fig.show() ``` -#### Accessing the y-axis values +#### Accessing the counts (y-axis) values -JavaScript calculates the y-axis (count) values on the fly in the browser, so it's not accessible in the `fig`. You can manually calculate it using `pandas.cut` or `np.digitize`. +JavaScript calculates the y-axis (count) values on the fly in the browser, so it's not accessible in the `fig`. You can manually calculate it using `np.histogram`. ```python import plotly.express as px -import pandas as pd +import numpy as np df = px.data.tips() +# create the bins +counts, bins = np.histogram(df.total_bill, bins=range(0, 60, 5)) +bins = 0.5 * (bins[:-1] + bins[1:]) -# create the bins; use the `range` to get the same bin size as in the histogram in the previous section. -df["total_bill_bins"] = pd.cut(df.total_bill, bins=range(0, 60, 5), right=False) - -# calculate counts -df_counts = df.pivot_table(index="total_bill_bins", values="size", aggfunc='count').reset_index() -df_counts.rename(columns={"size": "count"}, inplace=True) - -# sort, then convert to string -df_counts = df_counts.sort_values(by="total_bill_bins") -df_counts["total_bill_bins"] = df_counts["total_bill_bins"].astype(str) - -# display calculated counts on the bar chart -fig = px.bar(df_counts, x="total_bill_bins", y="count") +fig = px.bar(x=bins, y=counts, labels={'x':'total_bill', 'y':'count'}) fig.show() ```