Skip to content

Commit

Permalink
analysis: better genre histogram
Browse files Browse the repository at this point in the history
  • Loading branch information
mdeff committed Jun 17, 2020
1 parent 0ea2c9c commit 152a852
Showing 1 changed file with 21 additions and 27 deletions.
48 changes: 21 additions & 27 deletions analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@
"ax2.legend(loc='upper right')\n",
"ax2.grid(False)\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig('tag_distribution.pdf')"
"fig.tight_layout()\n",
"fig.savefig('tag_distribution.pdf')"
]
},
{
Expand Down Expand Up @@ -565,32 +565,26 @@
"metadata": {},
"outputs": [],
"source": [
"p = sns.distplot(tracks['track', 'genres'].map(len), kde=False, bins=range(32))\n",
"p = sns.distplot(tracks['track', 'genres_all'].map(len), kde=False, bins=range(32))\n",
"p.set_xlabel('#genres per track')\n",
"p.set_ylabel('#tracks');\n",
"#tracks['track', 'genres_all'].map(len).describe()\n",
"#tracks['track', 'genres_all'].map(len).value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Same as above, formatted for the paper.\n",
"plt.figure(figsize=(5, 4))\n",
"d = tracks['track', 'genres_all'].map(len)\n",
"p = sns.distplot(d[d.values <= 10], kde=False, bins=range(11), color='k', hist_kws=dict(alpha=0.4))\n",
"p.set_xlabel('#genres per track')\n",
"p.set_ylabel('#tracks');\n",
"p.set_xlim(0, 10)\n",
"p.set_xticks(range(11));\n",
"plt.tight_layout()\n",
"plt.savefig('genres_per_track.pdf')\n",
"# Genres per track.\n",
"labels = ['genres', 'genres_all'] #, 'genres_top']\n",
"d = [tracks['track', label].map(len) for label in labels]\n",
"labels = ['{}\\nmax: {}'.format(label, d1.max()) for label, d1 in zip(labels, d)]\n",
"\n",
"for l, d1 in zip(labels, d):\n",
" print('{} per track: from {} to {} tags'.format(l, d1.min(), d1.max()))\n",
"print('#tracks without genre: {}'.format((tracks['track', 'genres'].map(len) == 0).sum()))\n",
"\n",
"d.min(), d.max()"
"MAX = 9\n",
"fig, ax = plt.subplots(figsize=(5, 4))\n",
"ax.hist(d, bins=np.arange(MAX)-0.5, label=labels)\n",
"ax.set_xlabel('#genres per track')\n",
"ax.set_ylabel('#tracks')\n",
"ax.set_xlim(-0.5, MAX-1.5)\n",
"ax.set_xticks(range(MAX-1))\n",
"ax.set_yticklabels(['0'] + ['{}0k'.format(i) for i in range(1, 6)])\n",
"ax.legend(loc='upper right')\n",
"fig.tight_layout()\n",
"fig.savefig('genres_per_track.pdf')"
]
},
{
Expand Down

0 comments on commit 152a852

Please sign in to comment.