# 如何在 Matplotlib 直方图中手动设置 bin 的大小

Suraj Joshi 2023年1月30日 2020年6月9日 Matplotlib Matplotlib Histogram

## 将 bin 边界作为 `hist()` 函数的参数

``````hist (x,
bins: NoneType=None,
range: NoneType=None,
density: NoneType=None,
weights: NoneType=None,
cumulative: bool=False,
bottom: NoneType=None,
histtype: str=builtins.str,
align: str=builtins.str,
orientation: str=builtins.str,
rwidth: NoneType=None,
log: bool=False,
color: NoneType=None,
label: NoneType=None,
stacked: bool=False,
normed: NoneType=None,
data: NoneType=None,
**kwargs)
``````

``````import numpy as np
import numpy.random as random
import matplotlib.pyplot as plt

data = np.random.random_sample(100) * 100.0
plt.hist(data, bins=[0, 10, 20, 30, 40, 50, 60, 80, 100])
plt.xlabel('Value')
plt.ylabel('Counts')
plt.title('Histogram Plot of Data')
plt.grid(True)
plt.show()
``````

``````import numpy as np
import numpy.random as random
import matplotlib.pyplot as plt

binwidth=10
data = np.random.random_sample(100) * 100.0
plt.hist(data, bins=np.arange(min(data), max(data) + binwidth, binwidth))
plt.xlabel('Data')
plt.ylabel('Counts')
plt.title('Histogram Plot of Data')
plt.grid(True)
plt.show()
``````

`np.arange` 的第二个参数应该是 `max(data) + binwidth`，而不是 `max(data)`，因为 `np.arange(start, top, step)` 创建的间隔包括 `start`，但是不包含 `end`。因此，我们需要在 `max(data)` 上添加区间 `binwidth`，以使实际停止为 `max(data)`

## 从所需宽度计算 bin 的数量(箱数)

``````import numpy as np
import matplotlib.pyplot as plt

def find_bins(observations, width):
minimmum = np.min(observations)
maximmum = np.max(observations)
bound_min = -1.0 * (minimmum % width - minimmum)
bound_max = maximmum - maximmum % width + width
n = int((bound_max - bound_min) / width) + 1
bins = np.linspace(bound_min, bound_max, n)
return bins

data = np.random.random_sample(120) * 100
bins = find_bins(data, 10.0)
plt.hist(data, bins=bins)
plt.xlabel('Data')
plt.ylabel('Counts')
plt.title('Histogram Plot')
plt.show()
``````

Author: Suraj Joshi

Suraj Joshi is a backend software engineer at Matrice.ai.