import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import numpy as np import pandas as pd import seaborn as sns from windrose import WindroseAxes def plot_year(data: pd.DataFrame, path: str, lat: float, g: float): if data.size == 0: return False data.loc[:, "date"] = data["file_name"].str[:15] filtered_df = data[["date"] + [col for col in data.columns if col != "file_name" and col != "date"]] filtered_df.reset_index(drop=True, inplace=True) filtered_df = filtered_df.drop_duplicates(subset="date", keep="last") # 使用 drop_duplicates 函数,保留每组重复中的最后一个记录 filtered_df.reset_index(drop=True, inplace=True) filtered_df = filtered_df[filtered_df["w_f"] < 10] # 筛选 w_f 值小于 10 的数据 # todo:1-删除不合理的数据 # 1.先剔除明显异常的值 for column in filtered_df.columns[1:]: # 不考虑第一列日期列 filtered_df = filtered_df[(filtered_df[column] >= -9999) & (filtered_df[column] <= 9999)] # 460 # 2.再用四分位数法,适合所有数据集 def remove_outliers_iqr(df): for column in df.columns[9:]: # 从第10列开始 Q1 = df[column].quantile(0.25) Q3 = df[column].quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 5 * IQR upper_bound = Q3 + 5 * IQR # 过滤掉异常值 df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)] return df filtered_df = remove_outliers_iqr(filtered_df) # 408 # 画图 fig = plt.figure(figsize=(36, 20)) gs = gridspec.GridSpec(4, 12) ax1 = fig.add_subplot(gs[0, 0:3]) ax2 = fig.add_subplot(gs[0, 3:6]) ax3 = fig.add_subplot(gs[0, 6:9]) ax4 = fig.add_subplot(gs[0, 9:12]) ax5_1 = fig.add_subplot(gs[1, 0:2]) ax5_2 = fig.add_subplot(gs[1, 2:4]) ax5_3 = fig.add_subplot(gs[1, 4:6]) ax6_1 = fig.add_subplot(gs[1, 6:8]) ax6_2 = fig.add_subplot(gs[1, 8:10]) ax6_3 = fig.add_subplot(gs[1, 10:12]) ax7_1 = fig.add_subplot(gs[2, 0:2]) ax7_2 = fig.add_subplot(gs[2, 2:4]) ax8 = fig.add_subplot(gs[2, 4:8]) ax9 = fig.add_subplot(gs[2, 8:12]) ax10 = [] for i in range(0, 10, 3): ax10.append(fig.add_subplot(gs[3, i : i + 3], projection="windrose")) sns.set_theme(style="whitegrid", font="SimHei") # 设置绘图样式为白色背景和网格线 # 1、w/f比值 # 计算bins的边界 min_val = 1 max_val = 10 bin_width = 0.5 # 创建bins的边界 bins = np.arange(min_val, max_val + bin_width, bin_width) sns.histplot(filtered_df["w_f"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax1) # 加上stat='percent'表示算的是频率 # 设置x轴范围 ax1.set_xlim(min_val, max_val) # 添加标题和标签 ax1.set_title("w/f值统计结果", fontsize=24) ax1.set_xlabel("w/f") ax1.set_ylabel("Occurrence(%)") # 2、周期 # 计算bins的边界 min_val = 1 max_val = 10 bin_width = 0.5 # 创建bins的边界 bins = np.arange(min_val, max_val + bin_width, bin_width) min_val = np.floor(filtered_df["zhou_qi"].min()) # 向下取整 max_val = np.ceil(filtered_df["zhou_qi"].max()) # 向上取整 bin_width = 1 bins = np.arange(min_val, max_val + bin_width, bin_width) sns.histplot(filtered_df["zhou_qi"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax2) # 加上stat='percent'表示算的是频率 # 设置x轴范围 ax2.set_xlim(min_val, max_val) # 添加标题和标签 ax2.set_title("周期统计结果", fontsize=24) ax2.set_xlabel("h") ax2.set_ylabel("Occurrence(%)") # 3、垂直波长 # 创建bins的边界 min_val = np.floor(filtered_df["ver_wave_len"].min()) # 向下取整 max_val = np.ceil(filtered_df["ver_wave_len"].max()) # 向上取整 bin_width = 0.5 bins = np.arange(min_val, max_val + bin_width, bin_width) sns.histplot(filtered_df["ver_wave_len"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax3) # 加上stat='percent'表示算的是频率 # 设置x轴范围 ax3.set_xlim(min_val, max_val) # 添加标题和标签 ax3.set_title("垂直波长分布", fontsize=24) ax3.set_xlabel("Vertical wavelength(km)") ax3.set_ylabel("Occurrence(%)") # 4、水平波长 # 创建bins的边界 min_val = np.floor(filtered_df["hori_wave_len"].min()) # 向下取整 max_val = np.ceil(filtered_df["hori_wave_len"].max()) # 向上取整 bin_width = 100 bins = np.arange(min_val, max_val + bin_width, bin_width) sns.histplot(filtered_df["hori_wave_len"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax4) # 加上stat='percent'表示算的是频率 # 设置x轴范围 ax4.set_xlim(min_val, max_val) # 添加标题和标签 ax4.set_title("水平波长分布", fontsize=24) ax4.set_xlabel("Horizontal wavelength(km)") ax4.set_ylabel("Occurrence(%)") # 5、本征相速度 # 纬向本征相速度 # 计算bins的边界 min_val = np.floor(filtered_df["c_x"].min()) # 向下取整 max_val = np.ceil(filtered_df["c_x"].max()) # 向上取整 bin_width = 10 bins = np.arange(min_val, max_val + bin_width, bin_width) # 创建纬向直方图 sns.histplot(filtered_df["c_x"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_1) ax5_1.set_xlim(min_val, max_val) ax5_1.set_title("纬向本征相速度", fontsize=24) ax5_1.set_xlabel("Zonal phase speed (m/s)") ax5_1.set_ylabel("Occurrence (%)") # 经向本征相速度 # 计算bins的边界 min_val = np.floor(filtered_df["c_y"].min()) # 向下取整 max_val = np.ceil(filtered_df["c_y"].max()) # 向上取整 bin_width = 10 bins = np.arange(min_val, max_val + bin_width, bin_width) # 创建经向直方图 sns.histplot(filtered_df["c_y"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_2) ax5_2.set_xlim(min_val, max_val) ax5_2.set_title("经向本征相速度", fontsize=24) ax5_2.set_xlabel("Meridional phase speed (m/s)") ax5_2.set_ylabel("Occurrence (%)") # 垂直本征相速度 # 计算bins的边界 min_val = filtered_df["c_z"].min() # -1.148 max_val = filtered_df["c_z"].max() # 0.624 bin_width = 0.1 bins = np.arange(min_val, max_val + bin_width, bin_width) # 创建垂直直方图 sns.histplot(filtered_df["c_z"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_3) ax5_3.set_xlim(min_val, max_val) ax5_3.set_title("垂直本征相速度", fontsize=24) ax5_3.set_xlabel("Vertical phase speed (m/s)") ax5_3.set_ylabel("Occurrence (%)") # 6、扰动振幅 # 纬向扰动振幅 # 计算bins的边界 min_val = np.floor(filtered_df["u1"].min()) # 向下取整 max_val = np.ceil(filtered_df["u1"].max()) # 向上取整 bin_width = 0.5 bins = np.arange(min_val, max_val + bin_width, bin_width) # 创建纬向直方图 sns.histplot(filtered_df["u1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_1) ax6_1.set_xlim(min_val, max_val) ax6_1.set_title(" ", fontsize=24) ax6_1.set_xlabel("Zonal wind amplitude (m/s)") ax6_1.set_ylabel("Occurrence (%)") # 经向扰动振幅 # 计算bins的边界 min_val = np.floor(filtered_df["v1"].min()) # 向下取整 max_val = np.ceil(filtered_df["v1"].max()) # 向上取整 bin_width = 0.5 bins = np.arange(min_val, max_val + bin_width, bin_width) # 创建经向直方图 sns.histplot(filtered_df["v1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_2) ax6_2.set_xlim(min_val, max_val) ax6_2.set_title("扰动振幅统计结果", fontsize=24) ax6_2.set_xlabel("Meridional wind amplitude (m/s)") ax6_2.set_ylabel("Occurrence (%)") # 垂直扰动振幅 # 计算bins的边界 min_val = np.floor(filtered_df["T1"].min()) # 向下取整 max_val = np.ceil(filtered_df["T1"].max()) # 向上取整 bin_width = 0.5 bins = np.arange(min_val, max_val + bin_width, bin_width) # 创建垂直直方图 sns.histplot(filtered_df["T1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_3) ax6_3.set_xlim(min_val, max_val) ax6_3.set_title(" ", fontsize=24) ax6_3.set_xlabel("Temperature amplitude (K)") ax6_3.set_ylabel("Occurrence (%)") # 7、动量通量 # 挑选出向上传的重力波 filtered_df1 = filtered_df[filtered_df["a"] == 1] # 绘制第一个子图 # 计算bins的边界 min_val = np.floor(filtered_df1["MFu"].min()) # 向下取整 max_val = np.ceil(filtered_df1["MFu"].max()) # 向上取整 bin_width = 0.1 bins = np.arange(min_val, max_val + bin_width, bin_width) sns.histplot(filtered_df1["MFu"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax7_1) ax7_1.set_xlim(min_val, max_val) # 设置x轴范围 ax7_1.set_title("纬向动量通量统计结果", fontsize=24) # 添加标题 ax7_1.set_xlabel("Zonal momentum flux(mPa)") # x轴标签 ax7_1.set_ylabel("Occurrence(%)") # y轴标签 # 绘制第二个子图 # 计算bins的边界 min_val = np.floor(filtered_df1["MFv"].min()) # 向下取整 max_val = np.ceil(filtered_df1["MFv"].max()) # 向上取整 bin_width = 0.1 bins = np.arange(min_val, max_val + bin_width, bin_width) sns.histplot(filtered_df1["MFv"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax7_2) ax7_2.set_xlim(min_val, max_val) # 设置x轴范围 ax7_2.set_title("经向动量通量统计结果", fontsize=24) # 添加标题 ax7_2.set_xlabel("Meridional momentum flux(mPa)") # x轴标签 ax7_2.set_ylabel("Occurrence(%)") # y轴标签 # 10、水平传播方向 filtered_df["date1"] = filtered_df["date"].str.split("T").str[0] # 再加一列,只保留日期部分 filtered_df["date1"] = pd.to_datetime(filtered_df["date1"], format="%Y%m%d") # 确保 'date1' 列是日期格式 # 添加季节列 def get_season(date): month = date.month if month in [12, 1, 2]: return "Winter" elif month in [3, 4, 5]: return "Spring" elif month in [6, 7, 8]: return "Summer" else: return "Fall" filtered_df["season"] = filtered_df["date1"].apply(get_season) # 添加季节列 seasons = ["Winter", "Spring", "Summer", "Fall"] for ax, season in zip(ax10, seasons): season_data = filtered_df[filtered_df["season"] == season] windrose = WindroseAxes.from_ax(ax) ax.set_title(season, fontsize=18) windrose.bar(season_data["b"], np.ones_like(season_data["b"]), normed=False) # normed=True表示占每个季节的占比 # # 添加总标题 # fig.suptitle("水平传播方向在各个季节的分布变化", fontsize=16, fontweight="bold") # 8、垂直传播方向 # 设置 日期'date1' 列为索引 filtered_df.set_index("date1", inplace=True) # 按月份分组并计算百分比 monthly_stats_df = ( filtered_df.groupby([filtered_df.index.month, filtered_df.index.year]) .apply(lambda x: pd.Series({"Upload (%)": (x["a"] == 1).mean() * 100, "Downward (%)": (x["a"] == -1).mean() * 100})) .reset_index(level=1, drop=True) ) # 按月份汇总这些年的数据 monthly_avg_stats_df = monthly_stats_df.groupby(level=0).mean() # 确保索引是 numpy 数组 dates = monthly_avg_stats_df.index.to_numpy() # 绘制折线图 ax8.plot(dates, monthly_avg_stats_df["Upload (%)"].to_numpy(), marker="o", label="Up (%)") ax8.plot(dates, monthly_avg_stats_df["Downward (%)"].to_numpy(), marker="o", label="Down (%)") # 设置月份标签 ax8.set_xticks( ticks=np.arange(1, 13), labels=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=0 ) # 不倾斜,rotation=0表示倾斜45° # 添加图例、标题和坐标轴标签 ax8.legend() ax8.set_title("每月上传/下传重力波占比", fontsize=24) ax8.set_xlabel("Month") ax8.set_ylabel("Percentage (%)") # 9、动能和势能 filtered_df.reset_index(inplace=True) # 取消索引并恢复为默认整数索引 # 提取年月 filtered_df["year_month"] = filtered_df["date1"].dt.to_period("M") # 计算每个月的动能和势能的平均值 monthly_avg = filtered_df.groupby("year_month")[["Ek", "E_p"]].mean() # 创建完整的月份范围(因为有的月份没数据) full_range = pd.period_range(start=monthly_avg.index.min(), end=monthly_avg.index.max(), freq="M") # 创建一个新的 DataFrame 使用完整的年份月份范围 full_monthly_avg = pd.DataFrame(index=full_range) # 将原始数据合并到新的 DataFrame 中 full_monthly_avg = full_monthly_avg.join(monthly_avg) # 确保 'Ek' 和 'E_p' 列为数值型 full_monthly_avg["Ek"] = pd.to_numeric(full_monthly_avg["Ek"], errors="coerce") full_monthly_avg["E_p"] = pd.to_numeric(full_monthly_avg["E_p"], errors="coerce") # 只显示每年6月、12月,简化图形 # 绘制 Ek、E_p ax9.plot(full_monthly_avg.index.values.astype(str), full_monthly_avg["Ek"].values, marker="o", linestyle="-", color="r", label="动能月平均值") ax9.plot(full_monthly_avg.index.values.astype(str), full_monthly_avg["E_p"].values, marker="o", linestyle="-", color="b", label="势能月平均值") # 添加标题和标签 ax9.set_title("动能和势能分布情况", fontsize=24) ax9.set_xlabel("Month", fontsize=14) ax9.set_ylabel("Wave energy (J/kg)", fontsize=14) # 设定横轴标签 months = full_monthly_avg.index.values.astype(str) # 获取所有年份的6月和12月的索引 june_indices = [i for i, date in enumerate(months) if date.endswith("-06")] december_indices = [i for i, date in enumerate(months) if date.endswith("-12")] selected_indices = june_indices + december_indices # 只显示选定的标签 ax9.set_xticks(ticks=selected_indices, labels=[months[i] for i in selected_indices], rotation=45) # 添加网格和图例 # plt.grid() ax9.legend() # 显示图例 plt.rcParams["font.sans-serif"] = ["SimHei"] # 显示中文 plt.rcParams["axes.unicode_minus"] = False # 正常显示负号 plt.tight_layout() plt.savefig(path) plt.close() return True