# Pandas 相关矩阵

Suraj Joshi 2024年2月15日

``````import pandas as pd

employees_df = pd.DataFrame(
{
"Name": ["Jonathan", "Will", "Michael", "Liva", "Sia", "Alice"],
"Age": [20, 22, 29, 20, 20, 21],
"Weight(KG)": [65, 75, 80, 60, 63, 70],
"Height(meters)": [1.6, 1.7, 1.85, 1.69, 1.8, 1.75],
"Salary(\$)": [3200, 3500, 4000, 2090, 2500, 3600],
}
)

print(employees_df, "\n")
``````

``````       Name  Age  Weight(KG)  Height(meters)  Salary(\$)
0  Jonathan   20          65            1.60       3200
1      Will   22          75            1.70       3500
2   Michael   29          80            1.85       4000
3      Liva   20          60            1.69       2090
4       Sia   20          63            1.80       2500
5     Alice   21          70            1.75       3600
``````

## 使用 `DataFrame.corr()` 方法生成相关矩阵

``````import pandas as pd

employees_df = pd.DataFrame(
{
"Name": ["Jonathan", "Will", "Michael", "Liva", "Sia", "Alice"],
"Age": [20, 22, 29, 20, 20, 21],
"Weight(KG)": [65, 75, 80, 60, 63, 70],
"Height(meters)": [1.6, 1.7, 1.85, 1.69, 1.8, 1.75],
"Salary(\$)": [3200, 3500, 4000, 2090, 2500, 3600],
}
)

print("The DataFrame of Employees is:")
print(employees_df, "\n")

corr_df = employees_df.corr()
print("The correlation DataFrame is:")
print(corr_df, "\n")
``````

``````The DataFrame of Employees is:
Name  Age  Weight(KG)  Height(meters)  Salary(\$)
0  Jonathan   20          65            1.60       3200
1      Will   22          75            1.70       3500
2   Michael   29          80            1.85       4000
3      Liva   20          60            1.69       2090
4       Sia   20          63            1.80       2500
5     Alice   21          70            1.75       3600

The correlation DataFrame is:
Age  Weight(KG)  Height(meters)  Salary(\$)
Age             1.000000    0.848959        0.655252   0.695206
Weight(KG)      0.848959    1.000000        0.480998   0.914861
Height(meters)  0.655252    0.480998        1.000000   0.285423
Salary(\$)       0.695206    0.914861        0.285423   1.000000
``````

## 使用 `Matplotlib.pyplot.matshow()` 方法可视化 Pandas 相关矩阵

``````import pandas as pd
import matplotlib.pyplot as plt

employees_df = pd.DataFrame(
{
"Name": ["Jonathan", "Will", "Michael", "Liva", "Sia", "Alice"],
"Age": [20, 22, 29, 20, 20, 21],
"Weight(KG)": [65, 75, 80, 60, 63, 70],
"Height(meters)": [1.6, 1.7, 1.85, 1.69, 1.8, 1.75],
"Salary(\$)": [3200, 3500, 4000, 2090, 2500, 3600],
}
)

corr_df = employees_df.corr(method="pearson")

plt.matshow(corr_df)
plt.show()
``````

## 使用 `seaborn.heatmap()` 方法可视化 Pandas 相关矩阵

``````import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

employees_df = pd.DataFrame(
{
"Name": ["Jonathan", "Will", "Michael", "Liva", "Sia", "Alice"],
"Age": [20, 22, 29, 20, 20, 21],
"Weight(KG)": [65, 75, 80, 60, 63, 70],
"Height(meters)": [1.6, 1.7, 1.85, 1.69, 1.8, 1.75],
"Salary(\$)": [3200, 3500, 4000, 2090, 2500, 3600],
}
)

corr_df = employees_df.corr(method="pearson")

plt.figure(figsize=(8, 6))
sns.heatmap(corr_df, annot=True)
plt.show()
``````

## 使用 `DataFrame.style` 属性可视化相关矩阵

``````import pandas as pd

employees_df = pd.DataFrame(
{
"Name": ["Jonathan", "Will", "Michael", "Liva", "Sia", "Alice"],
"Age": [20, 22, 29, 20, 20, 21],
"Weight(KG)": [65, 75, 80, 60, 63, 70],
"Height(meters)": [1.6, 1.7, 1.85, 1.69, 1.8, 1.75],
"Salary(\$)": [3200, 3500, 4000, 2090, 2500, 3600],
}
)

corr_df = employees_df.corr(method="pearson")

`corr_df` DataFrame 对象的 `style` 属性返回一个 `Styler` 对象。我们可以使用 `Styler` 对象的 `background_gradient` 来可视化 DataFrame 对象。