import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

penguins = pd.read_csv('../Data/penguins.csv')

Matplotlib：bar/barh¶

matplotlib.axes.Axes.bar — Matplotlib 3.7.0 documentation

matplotlib.axes.Axes.barh — Matplotlib 3.10.1 documentation

matplotlib.pyplot.barh — Matplotlib 3.10.1 documentation

bar(x, height, width=0.8, bottom=0, *, align='center', data=None, **kwargs)

barh(y, width, height=0.8, left=0, *, align='center', data=None, **kwargs)

该函数继承自Rectangle类，**kwargs是一个可用Rectangle属性的列表，锚点参数xy是每个柱的左下角坐标，且旋转锚点设置也只能是'xy'（默认）。

绘制垂直/水平条形/柱状图。柱的位置由 x/y 和给定的align确定，尺寸由height和width给出。垂直/水平基线是 bottom/left（默认为 0）。许多参数可以接受单个值，应用于所有柱，或者接受一个序列值，每个柱一个值。

基础绘制¶

条形/柱状图常用于分类数据，例如条形底部/左侧的字符串标签。可以直接将字符串列表提供给 x/y 。 bar(['A', 'B', 'C'], [1, 2, 3])/barh(['A', 'B', 'C'], [1, 2, 3]) 通常比 bar(range(3), [1, 2, 3], tick_label=['A', 'B', 'C'])/barh(range(3), [1, 2, 3], tick_label=['A', 'B', 'C'])简短且方便。只要名称唯一，它们是等效的。显式的 tick_label 标记会按照给定的顺序绘制名称。然而，当分类 x/y 数据中有重复值时，这些值映射到相同的数值 x/y 坐标，因此相应的条形会重叠绘制。

height表示（每个）柱的**高度**。注意，如果bottom/y有单位（例如日期时间），则height应以bottom/y值的差值为单位（例如时间差）。
width表示（每个）柱的**宽度**。注意，如果 x/left有单位（例如日期时间），则width应以x/left值的差值为单位（例如时间差）。
bottom/left表示（每个）柱的**底边y坐标/左侧边x坐标**。注意，如果bottom/left有单位，则 y / x轴将获得适合这些单位的定位器和格式化器（例如日期或分类）。
align表示（每个）柱**与x/y**坐标的对齐方式。'center'将基线居中对齐到 x/y位置。'edge'将柱的左侧边缘与 x/y位置对齐。要将柱对齐到右侧边缘，应传入一个负width/height并使用 align='edge' 。

如果设置log参数为True，则对y轴进行对数缩放。data参数同前。

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.bar(x=[0, 1, 2],
       align='center',
       bottom=[1, 2, 3],
       height=[1, 2, 3],
       width=[0.2, 0.4, 0.8]
       )
"""
<BarContainer object of 3 artists>
"""

plt.show()

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.bar(x=[0, 1, 2],
       align='center',
       bottom=[1, 2, 3],
       height=[1, 2, 3],
       width=[0.2, 0.4, 0.8],
       tick_label=["A", "B", "C"]
       )
# ax.bar(x=["A", "B", "C"],
#        align='center',
#        bottom=[1, 2, 3],
#        height=[1, 2, 3],
#        width=[0.2, 0.4, 0.8]
#        )
"""
<BarContainer object of 3 artists>
"""

plt.show()

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.barh(y=[0, 1, 2],
        align='center',
        left=[1, 2, 3],
        width=[1, 2, 3],
        height=[0.2, 0.4, 0.8]
        )
"""
<BarContainer object of 3 artists>
"""

plt.show()

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.barh(y=[0, 1, 2],
        align='center',
        left=[1, 2, 3],
        width=[1, 2, 3],
        height=[0.2, 0.4, 0.8],
        tick_label=["A", "B", "C"]
        )
# ax.barh(y=["A", "B", "C"],
#         align='center',
#         left=[1, 2, 3],
#         width=[1, 2, 3],
#         height=[0.2, 0.4, 0.8]
#         )
"""
<BarContainer object of 3 artists>
"""

plt.show()

柱外观¶

参考Rectangle类参数。

angle：绕锚点旋转角度。
fill：是否填充。
facecolor/color：（每个）柱的填充色。如果两者都给出，则 facecolor 优先。
linestyle：边缘线型。
linewidth：（每个）柱的边缘线宽度。如果为 0，则不绘制边缘。
hatch：（每个）柱的框内线型。
hatch_linewidth：框内线宽。
edgecolor：（每个）柱的边缘色。
alpha：透明度。

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.bar(x=["A", "B", "C"],
       align='center',
       bottom=[1, 2, 3],
       height=[1, 2, 3],
       width=[0.2, 0.4, 0.8],

       # Rectangle参数
       angle=45,
       fill=True,
       facecolor=['r', 'g', 'b'],
       linestyle='--',
       linewidth=[2, 6, 10],
       hatch=['/', '-', '+'],
       hatch_linewidth=2,
       edgecolor=['c', 'k', 'm'],
       alpha=0.5
       )
"""
<BarContainer object of 3 artists>
"""

plt.show()

图例标签¶

单个标签附加到结果 BarContainer 上，作为整个数据集的标签。如果提供标签列表，则必须与 x 的长度相同，即为每个柱添加标签。重复的标签不会去重，会导致重复的标签条目，因此最好在每个柱具有不同样式（例如，通过传递颜色列表）时使用。

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.bar(x=["A", "B", "C"],
       align='center',
       bottom=0,
       height=[1, 2, 3],
       width=0.2,

       label="bar"
       )
"""
<BarContainer object of 3 artists>
"""
ax.legend()

plt.show()

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

width = 0.2
inner = width + 0.08

ax.bar(x=[i + n * inner for i in range(3) for n in range(3)],
       align='center',
       bottom=0,
       height=range(1, 10),
       width=width,
       tick_label=["", "A", "", "", "B", "", "", "C", ""],

       facecolor=['r', 'g', 'b'] * 3,
       label=['r', 'g', 'b', '_r', '_g', '_b', '_r', '_g', '_b']
       )
"""
<BarContainer object of 3 artists>
"""
ax.legend()

plt.show()

误差线¶

**error_kw：**传递给errorbar函数的关键字参数字典。在此处定义的 ecolor 或 capsize 值优先于独立的关键字参数。

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.bar(x=["A", "B", "C"],
       align='center',
       bottom=0,
       height=[1, 2, 3],
       width=0.2,

       xerr=[0.1, 0.2, 0.3],
       yerr=[0.2, 0.3, 0.4],
       error_kw={'ecolor': 'r',
                 'elinewidth': 2,
                 'capsize': 10,
                 'capthick': 2}
       )
"""
<BarContainer object of 3 artists>
"""

plt.show()

柱标签（bar_label）¶

matplotlib.axes.Axes.bar_label — Matplotlib 3.10.1 documentation

matplotlib.pyplot.bar_label — Matplotlib 3.10.1 documentation

Bar chart with labels — Matplotlib 3.10.1 documentation

bar_label(container, labels=None, *, fmt='%g', label_type='edge', padding=0, **kwargs)

为给定BarContainer的每个柱添加标签。**kwargs任何剩余的关键字参数都将传递给 Axes.annotate 。由于标签会自动对齐到条形，因此不支持对齐参数的设置。

**container：**包含所有条形图和可选误差线的容器，通常由 bar 或 barh 返回。可以通过ax.containers来查看Axes中拥有的BarContainer列表。
**labels：**要显示的标签文本列表。如果未提供，则标签文本为使用 fmt格式化的数据值。
label_type：'edge'，标签放置在柱段的端点处，默认显示的值将是该端点的位置；'center'，标签放置在柱段的中心，默认显示的值将是该段的长度（适用于堆叠条形图）。
**fmt：**用于标签的%或{}格式化字符串，或一个带有值作为第一个参数的函数。当 fmt 是字符串且可以同时解释这两种格式时，%优先于{}。
**padding：**标签与柱末端或中心的距离（以点为单位）。

ax.containers
"""
[<BarContainer object of 3 artists>, <BarContainer object of 3 artists>]
"""

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

ax.bar(x=["A", "B", "C"],
       align='center',
       bottom=0,
       height=[1, 2, 3],
       width=0.2
       )
"""
<BarContainer object of 3 artists>
"""
ax.bar(x=["A", "B", "C"],
       align='center',
       bottom=[1, 2, 3],
       height=[3, 2, 1],
       width=0.2
       )
"""
<BarContainer object of 3 artists>
"""

ax.bar_label(ax.containers[0],
             labels=['I', 'II', 'III'],
             label_type='center',
             fmt=None,
             padding=0,

             # Annotate参数
             fontsize=20,
             color='r',
             )
ax.bar_label(ax.containers[1],
             labels=None,
             label_type='edge',
             fmt='{:.2f}',
             padding=0,

             # Annotate参数
             fontsize=20,
             color='g',
             )

plt.show()

极坐标柱状图（玫瑰图）¶

极坐标柱状图是一种特殊形式的柱状图，又称南丁格尔玫瑰图(Nightingale Rose Chart)，它以南丁格尔(Florence Nightingale)命名，她在1858年首次使用这种图表来展示战争期间士兵死亡原因的数据。它将数据绘制在极坐标系中，而不是传统的笛卡尔坐标系中。图中的柱形通过角度（对应极坐标的角度变量θ）分布在圆周上，高度表示某一变量的数值。它适合展示周期性、环状或者分类数据。

Seaborn：barplot¶

seaborn.barplot — seaborn 0.13.2 documentation

sns.barplot(data=None, *, x=None, y=None, hue=None, order=None, hue_order=None, estimator='mean', errorbar=('ci', 95), n_boot=1000, seed=None, units=None, weights=None, orient=None, color=None, palette=None, saturation=0.75, fill=True, hue_norm=None, width=0.8, dodge='auto', gap=0, log_scale=None, native_scale=False, formatter=None, legend='auto', capsize=0, err_kws=None, ax=None, **kwargs)

以矩形条形图显示点估计和误差。条形图表示数值变量的汇总或统计估计，每个矩形的高度表示估计值，并使用误差线表示该估计值的不确定性。条形图包括轴范围中的 0，当 0 是该变量有意义的取值时，这是一个很好的选择。

默认情况下，此函数将其中一个变量视为分类变量，并在相关轴上以序数位置（0，1，… n）绘制数据，可以通过设置 native_scale=True 来禁用此功能。

**kwargs其他参数传递给matplotlib.patches.Rectangle ，同上。

orient：'v' / 'x' 或 'h' / 'y'
**width：**分配给 orient 轴上每个元素的宽度。当 native_scale=True 时，它与原始尺度中两个值之间的最小距离相对。
**dodge：**当使用色调映射时，元素是否应该沿 orient 轴变窄并移动以消除重叠。如果 "auto" ，当 orient轴变量与分类变量交叉时设置为 True ，否则设置为 False 。
gap：通过此因子在 orient 轴上缩小，以在 dodged 元素之间添加间隙。
**log_scale：**设置坐标轴缩放为对数缩放。单个值（bool值或数值）设置图中的所有数值轴。一对值（bool值或数值）独立设置每个轴。数值被解释为所需的底数（默认为 10）。当为 None 或 False 时，seaborn 将使用现有的 Axes缩放。
**native_scale：**当为 True 时，分类轴上的数值或日期时间值将保持其原始刻度，而不是转换为固定索引。
**formatter：**将分类数据转换为字符串的函数。影响分组和刻度标签。
**capsize：**误差线'帽'的宽度，相对于条形间距。
**err_kws：**用于误差线绘制的 matplotlib.lines.Line2D 参数。

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

data = pd.Series([1, 2, 3], index=["A", "B", "C"])
"""
A    1
B    2
C    3
dtype: int64
"""

sns.barplot(data=data,
            width=0.2,
            ax=ax
            )
"""
<Axes: >
"""

plt.show()

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

data = pd.DataFrame({"A": np.linspace(1, 100, 50),
                     "B": np.linspace(1, 50, 50),
                     "C": np.linspace(1, 200, 50)})
"""
             A     B           C
0     1.000000   1.0    1.000000
1     3.020408   2.0    5.061224
2     5.040816   3.0    9.122449
3     7.061224   4.0   13.183673
4     9.081633   5.0   17.244898
...
45   91.918367  46.0  183.755102
46   93.938776  47.0  187.816327
47   95.959184  48.0  191.877551
48   97.979592  49.0  195.938776
49  100.000000  50.0  200.000000
"""

sns.barplot(data=data,
            width=0.2,
            ax=ax
            )
"""
<Axes: >
"""

plt.show()

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

sns.barplot(data=penguins,
            x="species",
            y="body_mass_g",
            orient="x",
            order=['Gentoo', 'Chinstrap', 'Adelie'],
            formatter=lambda x: x+" penguins",

            hue='sex',
            hue_order=['FEMALE', 'MALE'],
            fill=True,
            palette={"FEMALE": "#facc87", "MALE": "#b1fa87"},
            saturation=0.75,

            estimator='mean',
            errorbar=('ci', 95),
            capsize=0.2,
            err_kws={"linestyle": "--",
                     "linewidth": 2,
                     "color": "r"},
            n_boot=1000,
            seed=0,
            units=None,
            weights=None,

            width=0.5,
            dodge='auto',
            gap=0.3,

            log_scale=False,
            native_scale=True,

            ax=ax
            )
"""
<Axes: xlabel='species', ylabel='body_mass_g'>
"""

plt.show()

Seaborn：countplot¶

seaborn.countplot — seaborn 0.13.2 documentation

sns.countplot(data=None, *, x=None, y=None, hue=None, order=None, hue_order=None, palette=None, saturation=0.75, fill=True, hue_norm=None, stat='count', width=0.8, dodge='auto', gap=0, log_scale=None, native_scale=False, formatter=None, legend='auto', ax=None, **kwargs)

使用条形图显示每个分类区间中观测值的数量。对于该函数，x和y参数不能同时传递，方向根据传递的是x还是y参数来决定。可以将计数图视为在分类变量上而不是在定量变量上的直方图。其基本 API 和选项与 barplot() 相同，因此可以比较嵌套变量之间的计数。

**stat：**统计量计算。当不是 'count' 时，柱状图的高度将归一化，使得它们在图中和为 100（'percent' ）或 1（'proportion', 'probability'）。

kwargs其他参数传递给matplotlib.patches.Rectangle ，同上。

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

sns.countplot(data=penguins,
              x="species",
              # y="species",
              order=['Gentoo', 'Chinstrap', 'Adelie'],
              formatter=lambda x: x+" penguins",
              stat="count",

              hue='sex',
              hue_order=['FEMALE', 'MALE'],
              fill=True,
              palette={"FEMALE": "#facc87", "MALE": "#b1fa87"},
              saturation=0.75,

              width=0.5,
              dodge='auto',
              gap=0.3,

              log_scale=False,
              native_scale=True,

              ax=ax
              )
"""
<Axes: xlabel='species', ylabel='count'>
"""

plt.show()

Seaborn：pointplot¶

seaborn.pointplot — seaborn 0.13.2 documentation

sns.pointplot(data=None, *, x=None, y=None, hue=None, order=None, hue_order=None, estimator='mean', errorbar=('ci', 95), n_boot=1000, seed=None, units=None, weights=None, color=None, palette=None, hue_norm=None, markers=<default>, linestyles=<default>, dodge=False, log_scale=None, native_scale=False, orient=None, capsize=0, formatter=None, legend='auto', err_kws=None, ax=None, **kwargs)

使用线条和标记显示点估计和误差。点图通过点的位置表示数值变量的集中趋势估计，并使用误差线提供对该估计不确定性的指示。点图比条形图更有助于集中比较一个或多个分类变量的不同水平。它们特别擅长展示交互作用：一个分类变量的水平与另一个分类变量的水平之间的关系如何变化。连接来自同一水平 hue 的每个点的线条允许通过斜率差异来判断交互作用，这比比较几个点组或条形的高度更容易为眼睛所接受。

**markers：**为每个hue水平使用的标记。
**linestyles：**为每个hue水平使用的线型。
**dodge：**沿着分类轴为每个级别的 hue 变量分离点的数量。设置为True将应用默认的小值。

**kwargs其他参数传递给matplotlib.lines.Line2D。

fig = plt.figure(figsize=(8, 8), dpi=100, layout="constrained")
ax = fig.add_subplot(1, 1, 1)

sns.pointplot(data=penguins,
              x="species",
              y="body_mass_g",
              orient="x",
              order=['Gentoo', 'Chinstrap', 'Adelie'],
              formatter=lambda x: x+" penguins",

              hue='sex',
              hue_order=['FEMALE', 'MALE'],
              palette={"FEMALE": "#facc87", "MALE": "#b1fa87"},

              estimator='mean',
              errorbar=('ci', 95),
              capsize=0.2,
              err_kws={"linestyle": "--",
                       "linewidth": 2,
                       "color": "r"},
              n_boot=1000,
              seed=0,
              units=None,
              weights=None,

              dodge=False,
              markers=['o', '*'],
              markersize=15,
              linestyles=['--', '-'],
              linewidth=2,

              log_scale=False,
              native_scale=True,

              ax=ax
              )
"""
<Axes: xlabel='species', ylabel='body_mass_g'>
"""

plt.show()