zephyr-backend/balloon/plot_year.py

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import pandas as pd
import seaborn as sns
from windrose import WindroseAxes


def plot_year(data: pd.DataFrame, path: str, lat: float, g: float):
    if data.size == 0:
        return False

    data.loc[:, "date"] = data["file_name"].str[:15]
    filtered_df = data[["date"] + [col for col in data.columns if col != "file_name" and col != "date"]]
    filtered_df.reset_index(drop=True, inplace=True)

    filtered_df = filtered_df.drop_duplicates(subset="date", keep="last")  # 使用 drop_duplicates 函数，保留每组重复中的最后一个记录
    filtered_df.reset_index(drop=True, inplace=True)

    filtered_df = filtered_df[filtered_df["w_f"] < 10]  # 筛选 w_f 值小于 10 的数据

    # todo:1-删除不合理的数据
    # 1.先剔除明显异常的值
    for column in filtered_df.columns[1:]:  # 不考虑第一列日期列
        filtered_df = filtered_df[(filtered_df[column] >= -9999) & (filtered_df[column] <= 9999)]  # 460

    # 2.再用四分位数法，适合所有数据集
    def remove_outliers_iqr(df):
        for column in df.columns[9:]:  # 从第10列开始
            Q1 = df[column].quantile(0.25)
            Q3 = df[column].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 5 * IQR
            upper_bound = Q3 + 5 * IQR

            # 过滤掉异常值
            df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
        return df

    filtered_df = remove_outliers_iqr(filtered_df)  #  408

    # 画图

    fig = plt.figure(figsize=(36, 20))
    gs = gridspec.GridSpec(4, 12)

    ax1 = fig.add_subplot(gs[0, 0:3])
    ax2 = fig.add_subplot(gs[0, 3:6])
    ax3 = fig.add_subplot(gs[0, 6:9])
    ax4 = fig.add_subplot(gs[0, 9:12])

    ax5_1 = fig.add_subplot(gs[1, 0:2])
    ax5_2 = fig.add_subplot(gs[1, 2:4])
    ax5_3 = fig.add_subplot(gs[1, 4:6])
    ax6_1 = fig.add_subplot(gs[1, 6:8])
    ax6_2 = fig.add_subplot(gs[1, 8:10])
    ax6_3 = fig.add_subplot(gs[1, 10:12])

    ax7_1 = fig.add_subplot(gs[2, 0:2])
    ax7_2 = fig.add_subplot(gs[2, 2:4])
    ax8 = fig.add_subplot(gs[2, 4:8])
    ax9 = fig.add_subplot(gs[2, 8:12])

    ax10 = []
    for i in range(0, 10, 3):
        ax10.append(fig.add_subplot(gs[3, i : i + 3], projection="windrose"))

    sns.set_theme(style="whitegrid", font="SimHei")  # 设置绘图样式为白色背景和网格线

    # 1、w/f比值
    # 计算bins的边界
    min_val = 1
    max_val = 10
    bin_width = 0.5
    # 创建bins的边界
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    sns.histplot(filtered_df["w_f"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax1)  # 加上stat='percent'表示算的是频率
    # 设置x轴范围
    ax1.set_xlim(min_val, max_val)
    # 添加标题和标签
    ax1.set_title("w/f值统计结果", fontsize=24)
    ax1.set_xlabel("w/f")
    ax1.set_ylabel("Occurrence(%)")

    # 2、周期
    # 计算bins的边界
    min_val = 1
    max_val = 10
    bin_width = 0.5
    # 创建bins的边界
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    min_val = np.floor(filtered_df["zhou_qi"].min())  # 向下取整
    max_val = np.ceil(filtered_df["zhou_qi"].max())  # 向上取整
    bin_width = 1
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    sns.histplot(filtered_df["zhou_qi"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax2)  # 加上stat='percent'表示算的是频率
    # 设置x轴范围
    ax2.set_xlim(min_val, max_val)
    # 添加标题和标签
    ax2.set_title("周期统计结果", fontsize=24)
    ax2.set_xlabel("h")
    ax2.set_ylabel("Occurrence(%)")

    # 3、垂直波长
    # 创建bins的边界
    min_val = np.floor(filtered_df["ver_wave_len"].min())  # 向下取整
    max_val = np.ceil(filtered_df["ver_wave_len"].max())  # 向上取整
    bin_width = 0.5
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    sns.histplot(filtered_df["ver_wave_len"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax3)  # 加上stat='percent'表示算的是频率
    # 设置x轴范围
    ax3.set_xlim(min_val, max_val)
    # 添加标题和标签
    ax3.set_title("垂直波长分布", fontsize=24)
    ax3.set_xlabel("Vertical wavelength(km)")
    ax3.set_ylabel("Occurrence(%)")

    # 4、水平波长
    # 创建bins的边界
    min_val = np.floor(filtered_df["hori_wave_len"].min())  # 向下取整
    max_val = np.ceil(filtered_df["hori_wave_len"].max())  # 向上取整
    bin_width = 100
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    sns.histplot(filtered_df["hori_wave_len"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax4)  # 加上stat='percent'表示算的是频率
    # 设置x轴范围
    ax4.set_xlim(min_val, max_val)
    # 添加标题和标签
    ax4.set_title("水平波长分布", fontsize=24)
    ax4.set_xlabel("Horizontal wavelength(km)")
    ax4.set_ylabel("Occurrence(%)")

    # 5、本征相速度

    # 纬向本征相速度
    # 计算bins的边界
    min_val = np.floor(filtered_df["c_x"].min())  # 向下取整
    max_val = np.ceil(filtered_df["c_x"].max())  # 向上取整
    bin_width = 10
    bins = np.arange(min_val, max_val + bin_width, bin_width)
    # 创建纬向直方图
    sns.histplot(filtered_df["c_x"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_1)
    ax5_1.set_xlim(min_val, max_val)
    ax5_1.set_title("纬向本征相速度", fontsize=24)
    ax5_1.set_xlabel("Zonal phase speed (m/s)")
    ax5_1.set_ylabel("Occurrence (%)")

    # 经向本征相速度
    # 计算bins的边界
    min_val = np.floor(filtered_df["c_y"].min())  # 向下取整
    max_val = np.ceil(filtered_df["c_y"].max())  # 向上取整
    bin_width = 10
    bins = np.arange(min_val, max_val + bin_width, bin_width)
    # 创建经向直方图
    sns.histplot(filtered_df["c_y"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_2)
    ax5_2.set_xlim(min_val, max_val)
    ax5_2.set_title("经向本征相速度", fontsize=24)
    ax5_2.set_xlabel("Meridional phase speed (m/s)")
    ax5_2.set_ylabel("Occurrence (%)")

    # 垂直本征相速度
    # 计算bins的边界
    min_val = filtered_df["c_z"].min()  # -1.148
    max_val = filtered_df["c_z"].max()  # 0.624
    bin_width = 0.1
    bins = np.arange(min_val, max_val + bin_width, bin_width)
    # 创建垂直直方图
    sns.histplot(filtered_df["c_z"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_3)
    ax5_3.set_xlim(min_val, max_val)
    ax5_3.set_title("垂直本征相速度", fontsize=24)
    ax5_3.set_xlabel("Vertical phase speed (m/s)")
    ax5_3.set_ylabel("Occurrence (%)")

    # 6、扰动振幅

    # 纬向扰动振幅
    # 计算bins的边界
    min_val = np.floor(filtered_df["u1"].min())  # 向下取整
    max_val = np.ceil(filtered_df["u1"].max())  # 向上取整
    bin_width = 0.5
    bins = np.arange(min_val, max_val + bin_width, bin_width)
    # 创建纬向直方图
    sns.histplot(filtered_df["u1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_1)
    ax6_1.set_xlim(min_val, max_val)
    ax6_1.set_title(" ", fontsize=24)
    ax6_1.set_xlabel("Zonal wind amplitude (m/s)")
    ax6_1.set_ylabel("Occurrence (%)")

    # 经向扰动振幅
    # 计算bins的边界
    min_val = np.floor(filtered_df["v1"].min())  # 向下取整
    max_val = np.ceil(filtered_df["v1"].max())  # 向上取整
    bin_width = 0.5
    bins = np.arange(min_val, max_val + bin_width, bin_width)
    # 创建经向直方图
    sns.histplot(filtered_df["v1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_2)
    ax6_2.set_xlim(min_val, max_val)
    ax6_2.set_title("扰动振幅统计结果", fontsize=24)
    ax6_2.set_xlabel("Meridional wind amplitude (m/s)")
    ax6_2.set_ylabel("Occurrence (%)")

    # 垂直扰动振幅
    # 计算bins的边界
    min_val = np.floor(filtered_df["T1"].min())  # 向下取整
    max_val = np.ceil(filtered_df["T1"].max())  # 向上取整
    bin_width = 0.5
    bins = np.arange(min_val, max_val + bin_width, bin_width)
    # 创建垂直直方图
    sns.histplot(filtered_df["T1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_3)
    ax6_3.set_xlim(min_val, max_val)
    ax6_3.set_title(" ", fontsize=24)
    ax6_3.set_xlabel("Temperature amplitude (K)")
    ax6_3.set_ylabel("Occurrence (%)")

    # 7、动量通量
    # 挑选出向上传的重力波
    filtered_df1 = filtered_df[filtered_df["a"] == 1]

    # 绘制第一个子图
    # 计算bins的边界
    min_val = np.floor(filtered_df1["MFu"].min())  # 向下取整
    max_val = np.ceil(filtered_df1["MFu"].max())  # 向上取整
    bin_width = 0.1
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    sns.histplot(filtered_df1["MFu"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax7_1)
    ax7_1.set_xlim(min_val, max_val)  # 设置x轴范围
    ax7_1.set_title("纬向动量通量统计结果", fontsize=24)  # 添加标题
    ax7_1.set_xlabel("Zonal momentum flux(mPa)")  # x轴标签
    ax7_1.set_ylabel("Occurrence(%)")  # y轴标签

    # 绘制第二个子图
    # 计算bins的边界
    min_val = np.floor(filtered_df1["MFv"].min())  # 向下取整
    max_val = np.ceil(filtered_df1["MFv"].max())  # 向上取整
    bin_width = 0.1
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    sns.histplot(filtered_df1["MFv"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax7_2)
    ax7_2.set_xlim(min_val, max_val)  # 设置x轴范围
    ax7_2.set_title("经向动量通量统计结果", fontsize=24)  # 添加标题
    ax7_2.set_xlabel("Meridional momentum flux(mPa)")  # x轴标签
    ax7_2.set_ylabel("Occurrence(%)")  # y轴标签

    # 10、水平传播方向

    filtered_df["date1"] = filtered_df["date"].str.split("T").str[0]  # 再加一列，只保留日期部分
    filtered_df["date1"] = pd.to_datetime(filtered_df["date1"], format="%Y%m%d")  # 确保 'date1' 列是日期格式

    # 添加季节列
    def get_season(date):
        month = date.month
        if month in [12, 1, 2]:
            return "Winter"
        elif month in [3, 4, 5]:
            return "Spring"
        elif month in [6, 7, 8]:
            return "Summer"
        else:
            return "Fall"

    filtered_df["season"] = filtered_df["date1"].apply(get_season)  # 添加季节列
    seasons = ["Winter", "Spring", "Summer", "Fall"]

    for ax, season in zip(ax10, seasons):
        season_data = filtered_df[filtered_df["season"] == season]
        windrose = WindroseAxes.from_ax(ax)
        ax.set_title(season, fontsize=18)
        windrose.bar(season_data["b"], np.ones_like(season_data["b"]), normed=False)  # normed=True表示占每个季节的占比

    # # 添加总标题
    # fig.suptitle("水平传播方向在各个季节的分布变化", fontsize=16, fontweight="bold")

    # 8、垂直传播方向
    # 设置 日期'date1' 列为索引
    filtered_df.set_index("date1", inplace=True)

    # 按月份分组并计算百分比
    monthly_stats_df = (
        filtered_df.groupby([filtered_df.index.month, filtered_df.index.year])
        .apply(lambda x: pd.Series({"Upload (%)": (x["a"] == 1).mean() * 100, "Downward (%)": (x["a"] == -1).mean() * 100}))
        .reset_index(level=1, drop=True)
    )

    # 按月份汇总这些年的数据
    monthly_avg_stats_df = monthly_stats_df.groupby(level=0).mean()
    # 确保索引是 numpy 数组
    dates = monthly_avg_stats_df.index.to_numpy()

    # 绘制折线图
    ax8.plot(dates, monthly_avg_stats_df["Upload (%)"].to_numpy(), marker="o", label="Up (%)")
    ax8.plot(dates, monthly_avg_stats_df["Downward (%)"].to_numpy(), marker="o", label="Down (%)")

    # 设置月份标签
    ax8.set_xticks(
        ticks=np.arange(1, 13), labels=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=0
    )  # 不倾斜，rotation=0表示倾斜45°
    # 添加图例、标题和坐标轴标签
    ax8.legend()
    ax8.set_title("每月上传/下传重力波占比", fontsize=24)
    ax8.set_xlabel("Month")
    ax8.set_ylabel("Percentage (%)")

    # 9、动能和势能
    filtered_df.reset_index(inplace=True)  # 取消索引并恢复为默认整数索引
    # 提取年月
    filtered_df["year_month"] = filtered_df["date1"].dt.to_period("M")
    # 计算每个月的动能和势能的平均值
    monthly_avg = filtered_df.groupby("year_month")[["Ek", "E_p"]].mean()

    # 创建完整的月份范围（因为有的月份没数据）
    full_range = pd.period_range(start=monthly_avg.index.min(), end=monthly_avg.index.max(), freq="M")

    # 创建一个新的 DataFrame 使用完整的年份月份范围
    full_monthly_avg = pd.DataFrame(index=full_range)
    # 将原始数据合并到新的 DataFrame 中
    full_monthly_avg = full_monthly_avg.join(monthly_avg)

    # 确保 'Ek' 和 'E_p' 列为数值型
    full_monthly_avg["Ek"] = pd.to_numeric(full_monthly_avg["Ek"], errors="coerce")
    full_monthly_avg["E_p"] = pd.to_numeric(full_monthly_avg["E_p"], errors="coerce")

    # 只显示每年6月、12月，简化图形

    # 绘制 Ek、E_p
    ax9.plot(full_monthly_avg.index.values.astype(str), full_monthly_avg["Ek"].values, marker="o", linestyle="-", color="r", label="动能月平均值")
    ax9.plot(full_monthly_avg.index.values.astype(str), full_monthly_avg["E_p"].values, marker="o", linestyle="-", color="b", label="势能月平均值")

    # 添加标题和标签
    ax9.set_title("动能和势能分布情况", fontsize=24)
    ax9.set_xlabel("Month", fontsize=14)
    ax9.set_ylabel("Wave energy (J/kg)", fontsize=14)

    # 设定横轴标签
    months = full_monthly_avg.index.values.astype(str)
    # 获取所有年份的6月和12月的索引
    june_indices = [i for i, date in enumerate(months) if date.endswith("-06")]
    december_indices = [i for i, date in enumerate(months) if date.endswith("-12")]
    selected_indices = june_indices + december_indices

    # 只显示选定的标签
    ax9.set_xticks(ticks=selected_indices, labels=[months[i] for i in selected_indices], rotation=45)

    # 添加网格和图例
    # plt.grid()
    ax9.legend()  # 显示图例

    plt.rcParams["font.sans-serif"] = ["SimHei"]  # 显示中文
    plt.rcParams["axes.unicode_minus"] = False  # 正常显示负号

    plt.tight_layout()
    plt.savefig(path)
    plt.close()

    return True