zephyr-backend/balloon/plot_year.py
2025-01-15 14:48:48 +08:00

360 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import pandas as pd
import seaborn as sns
from windrose import WindroseAxes
def plot_year(data: pd.DataFrame, path: str, lat: float, g: float):
if data.size == 0:
return False
data.loc[:, "date"] = data["file_name"].str[:15]
filtered_df = data[["date"] + [col for col in data.columns if col != "file_name" and col != "date"]]
filtered_df.reset_index(drop=True, inplace=True)
filtered_df = filtered_df.drop_duplicates(subset="date", keep="last") # 使用 drop_duplicates 函数,保留每组重复中的最后一个记录
filtered_df.reset_index(drop=True, inplace=True)
filtered_df = filtered_df[filtered_df["w_f"] < 10] # 筛选 w_f 值小于 10 的数据
# todo:1-删除不合理的数据
# 1.先剔除明显异常的值
for column in filtered_df.columns[1:]: # 不考虑第一列日期列
filtered_df = filtered_df[(filtered_df[column] >= -9999) & (filtered_df[column] <= 9999)] # 460
# 2.再用四分位数法,适合所有数据集
def remove_outliers_iqr(df):
for column in df.columns[9:]: # 从第10列开始
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 5 * IQR
upper_bound = Q3 + 5 * IQR
# 过滤掉异常值
df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df
filtered_df = remove_outliers_iqr(filtered_df) # 408
# 画图
fig = plt.figure(figsize=(36, 20))
gs = gridspec.GridSpec(4, 12)
ax1 = fig.add_subplot(gs[0, 0:3])
ax2 = fig.add_subplot(gs[0, 3:6])
ax3 = fig.add_subplot(gs[0, 6:9])
ax4 = fig.add_subplot(gs[0, 9:12])
ax5_1 = fig.add_subplot(gs[1, 0:2])
ax5_2 = fig.add_subplot(gs[1, 2:4])
ax5_3 = fig.add_subplot(gs[1, 4:6])
ax6_1 = fig.add_subplot(gs[1, 6:8])
ax6_2 = fig.add_subplot(gs[1, 8:10])
ax6_3 = fig.add_subplot(gs[1, 10:12])
ax7_1 = fig.add_subplot(gs[2, 0:2])
ax7_2 = fig.add_subplot(gs[2, 2:4])
ax8 = fig.add_subplot(gs[2, 4:8])
ax9 = fig.add_subplot(gs[2, 8:12])
ax10 = []
for i in range(0, 10, 3):
ax10.append(fig.add_subplot(gs[3, i : i + 3], projection="windrose"))
sns.set_theme(style="whitegrid", font="SimHei") # 设置绘图样式为白色背景和网格线
# 1、w/f比值
# 计算bins的边界
min_val = 1
max_val = 10
bin_width = 0.5
# 创建bins的边界
bins = np.arange(min_val, max_val + bin_width, bin_width)
sns.histplot(filtered_df["w_f"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax1) # 加上stat='percent'表示算的是频率
# 设置x轴范围
ax1.set_xlim(min_val, max_val)
# 添加标题和标签
ax1.set_title("w/f值统计结果", fontsize=24)
ax1.set_xlabel("w/f")
ax1.set_ylabel("Occurrence(%)")
# 2、周期
# 计算bins的边界
min_val = 1
max_val = 10
bin_width = 0.5
# 创建bins的边界
bins = np.arange(min_val, max_val + bin_width, bin_width)
min_val = np.floor(filtered_df["zhou_qi"].min()) # 向下取整
max_val = np.ceil(filtered_df["zhou_qi"].max()) # 向上取整
bin_width = 1
bins = np.arange(min_val, max_val + bin_width, bin_width)
sns.histplot(filtered_df["zhou_qi"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax2) # 加上stat='percent'表示算的是频率
# 设置x轴范围
ax2.set_xlim(min_val, max_val)
# 添加标题和标签
ax2.set_title("周期统计结果", fontsize=24)
ax2.set_xlabel("h")
ax2.set_ylabel("Occurrence(%)")
# 3、垂直波长
# 创建bins的边界
min_val = np.floor(filtered_df["ver_wave_len"].min()) # 向下取整
max_val = np.ceil(filtered_df["ver_wave_len"].max()) # 向上取整
bin_width = 0.5
bins = np.arange(min_val, max_val + bin_width, bin_width)
sns.histplot(filtered_df["ver_wave_len"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax3) # 加上stat='percent'表示算的是频率
# 设置x轴范围
ax3.set_xlim(min_val, max_val)
# 添加标题和标签
ax3.set_title("垂直波长分布", fontsize=24)
ax3.set_xlabel("Vertical wavelength(km)")
ax3.set_ylabel("Occurrence(%)")
# 4、水平波长
# 创建bins的边界
min_val = np.floor(filtered_df["hori_wave_len"].min()) # 向下取整
max_val = np.ceil(filtered_df["hori_wave_len"].max()) # 向上取整
bin_width = 100
bins = np.arange(min_val, max_val + bin_width, bin_width)
sns.histplot(filtered_df["hori_wave_len"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax4) # 加上stat='percent'表示算的是频率
# 设置x轴范围
ax4.set_xlim(min_val, max_val)
# 添加标题和标签
ax4.set_title("水平波长分布", fontsize=24)
ax4.set_xlabel("Horizontal wavelength(km)")
ax4.set_ylabel("Occurrence(%)")
# 5、本征相速度
# 纬向本征相速度
# 计算bins的边界
min_val = np.floor(filtered_df["c_x"].min()) # 向下取整
max_val = np.ceil(filtered_df["c_x"].max()) # 向上取整
bin_width = 10
bins = np.arange(min_val, max_val + bin_width, bin_width)
# 创建纬向直方图
sns.histplot(filtered_df["c_x"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_1)
ax5_1.set_xlim(min_val, max_val)
ax5_1.set_title("纬向本征相速度", fontsize=24)
ax5_1.set_xlabel("Zonal phase speed (m/s)")
ax5_1.set_ylabel("Occurrence (%)")
# 经向本征相速度
# 计算bins的边界
min_val = np.floor(filtered_df["c_y"].min()) # 向下取整
max_val = np.ceil(filtered_df["c_y"].max()) # 向上取整
bin_width = 10
bins = np.arange(min_val, max_val + bin_width, bin_width)
# 创建经向直方图
sns.histplot(filtered_df["c_y"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_2)
ax5_2.set_xlim(min_val, max_val)
ax5_2.set_title("经向本征相速度", fontsize=24)
ax5_2.set_xlabel("Meridional phase speed (m/s)")
ax5_2.set_ylabel("Occurrence (%)")
# 垂直本征相速度
# 计算bins的边界
min_val = filtered_df["c_z"].min() # -1.148
max_val = filtered_df["c_z"].max() # 0.624
bin_width = 0.1
bins = np.arange(min_val, max_val + bin_width, bin_width)
# 创建垂直直方图
sns.histplot(filtered_df["c_z"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax5_3)
ax5_3.set_xlim(min_val, max_val)
ax5_3.set_title("垂直本征相速度", fontsize=24)
ax5_3.set_xlabel("Vertical phase speed (m/s)")
ax5_3.set_ylabel("Occurrence (%)")
# 6、扰动振幅
# 纬向扰动振幅
# 计算bins的边界
min_val = np.floor(filtered_df["u1"].min()) # 向下取整
max_val = np.ceil(filtered_df["u1"].max()) # 向上取整
bin_width = 0.5
bins = np.arange(min_val, max_val + bin_width, bin_width)
# 创建纬向直方图
sns.histplot(filtered_df["u1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_1)
ax6_1.set_xlim(min_val, max_val)
ax6_1.set_title(" ", fontsize=24)
ax6_1.set_xlabel("Zonal wind amplitude (m/s)")
ax6_1.set_ylabel("Occurrence (%)")
# 经向扰动振幅
# 计算bins的边界
min_val = np.floor(filtered_df["v1"].min()) # 向下取整
max_val = np.ceil(filtered_df["v1"].max()) # 向上取整
bin_width = 0.5
bins = np.arange(min_val, max_val + bin_width, bin_width)
# 创建经向直方图
sns.histplot(filtered_df["v1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_2)
ax6_2.set_xlim(min_val, max_val)
ax6_2.set_title("扰动振幅统计结果", fontsize=24)
ax6_2.set_xlabel("Meridional wind amplitude (m/s)")
ax6_2.set_ylabel("Occurrence (%)")
# 垂直扰动振幅
# 计算bins的边界
min_val = np.floor(filtered_df["T1"].min()) # 向下取整
max_val = np.ceil(filtered_df["T1"].max()) # 向上取整
bin_width = 0.5
bins = np.arange(min_val, max_val + bin_width, bin_width)
# 创建垂直直方图
sns.histplot(filtered_df["T1"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax6_3)
ax6_3.set_xlim(min_val, max_val)
ax6_3.set_title(" ", fontsize=24)
ax6_3.set_xlabel("Temperature amplitude (K)")
ax6_3.set_ylabel("Occurrence (%)")
# 7、动量通量
# 挑选出向上传的重力波
filtered_df1 = filtered_df[filtered_df["a"] == 1]
# 绘制第一个子图
# 计算bins的边界
min_val = np.floor(filtered_df1["MFu"].min()) # 向下取整
max_val = np.ceil(filtered_df1["MFu"].max()) # 向上取整
bin_width = 0.1
bins = np.arange(min_val, max_val + bin_width, bin_width)
sns.histplot(filtered_df1["MFu"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax7_1)
ax7_1.set_xlim(min_val, max_val) # 设置x轴范围
ax7_1.set_title("纬向动量通量统计结果", fontsize=24) # 添加标题
ax7_1.set_xlabel("Zonal momentum flux(mPa)") # x轴标签
ax7_1.set_ylabel("Occurrence(%)") # y轴标签
# 绘制第二个子图
# 计算bins的边界
min_val = np.floor(filtered_df1["MFv"].min()) # 向下取整
max_val = np.ceil(filtered_df1["MFv"].max()) # 向上取整
bin_width = 0.1
bins = np.arange(min_val, max_val + bin_width, bin_width)
sns.histplot(filtered_df1["MFv"], bins=bins, kde=False, edgecolor="black", stat="percent", ax=ax7_2)
ax7_2.set_xlim(min_val, max_val) # 设置x轴范围
ax7_2.set_title("经向动量通量统计结果", fontsize=24) # 添加标题
ax7_2.set_xlabel("Meridional momentum flux(mPa)") # x轴标签
ax7_2.set_ylabel("Occurrence(%)") # y轴标签
# 10、水平传播方向
filtered_df["date1"] = filtered_df["date"].str.split("T").str[0] # 再加一列,只保留日期部分
filtered_df["date1"] = pd.to_datetime(filtered_df["date1"], format="%Y%m%d") # 确保 'date1' 列是日期格式
# 添加季节列
def get_season(date):
month = date.month
if month in [12, 1, 2]:
return "Winter"
elif month in [3, 4, 5]:
return "Spring"
elif month in [6, 7, 8]:
return "Summer"
else:
return "Fall"
filtered_df["season"] = filtered_df["date1"].apply(get_season) # 添加季节列
seasons = ["Winter", "Spring", "Summer", "Fall"]
for ax, season in zip(ax10, seasons):
season_data = filtered_df[filtered_df["season"] == season]
windrose = WindroseAxes.from_ax(ax)
ax.set_title(season, fontsize=18)
windrose.bar(season_data["b"], np.ones_like(season_data["b"]), normed=False) # normed=True表示占每个季节的占比
# # 添加总标题
# fig.suptitle("水平传播方向在各个季节的分布变化", fontsize=16, fontweight="bold")
# 8、垂直传播方向
# 设置 日期'date1' 列为索引
filtered_df.set_index("date1", inplace=True)
# 按月份分组并计算百分比
monthly_stats_df = (
filtered_df.groupby([filtered_df.index.month, filtered_df.index.year])
.apply(lambda x: pd.Series({"Upload (%)": (x["a"] == 1).mean() * 100, "Downward (%)": (x["a"] == -1).mean() * 100}))
.reset_index(level=1, drop=True)
)
# 按月份汇总这些年的数据
monthly_avg_stats_df = monthly_stats_df.groupby(level=0).mean()
# 确保索引是 numpy 数组
dates = monthly_avg_stats_df.index.to_numpy()
# 绘制折线图
ax8.plot(dates, monthly_avg_stats_df["Upload (%)"].to_numpy(), marker="o", label="Up (%)")
ax8.plot(dates, monthly_avg_stats_df["Downward (%)"].to_numpy(), marker="o", label="Down (%)")
# 设置月份标签
ax8.set_xticks(
ticks=np.arange(1, 13), labels=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=0
) # 不倾斜rotation=0表示倾斜45°
# 添加图例、标题和坐标轴标签
ax8.legend()
ax8.set_title("每月上传/下传重力波占比", fontsize=24)
ax8.set_xlabel("Month")
ax8.set_ylabel("Percentage (%)")
# 9、动能和势能
filtered_df.reset_index(inplace=True) # 取消索引并恢复为默认整数索引
# 提取年月
filtered_df["year_month"] = filtered_df["date1"].dt.to_period("M")
# 计算每个月的动能和势能的平均值
monthly_avg = filtered_df.groupby("year_month")[["Ek", "E_p"]].mean()
# 创建完整的月份范围(因为有的月份没数据)
full_range = pd.period_range(start=monthly_avg.index.min(), end=monthly_avg.index.max(), freq="M")
# 创建一个新的 DataFrame 使用完整的年份月份范围
full_monthly_avg = pd.DataFrame(index=full_range)
# 将原始数据合并到新的 DataFrame 中
full_monthly_avg = full_monthly_avg.join(monthly_avg)
# 确保 'Ek' 和 'E_p' 列为数值型
full_monthly_avg["Ek"] = pd.to_numeric(full_monthly_avg["Ek"], errors="coerce")
full_monthly_avg["E_p"] = pd.to_numeric(full_monthly_avg["E_p"], errors="coerce")
# 只显示每年6月、12月简化图形
# 绘制 Ek、E_p
ax9.plot(full_monthly_avg.index.values.astype(str), full_monthly_avg["Ek"].values, marker="o", linestyle="-", color="r", label="动能月平均值")
ax9.plot(full_monthly_avg.index.values.astype(str), full_monthly_avg["E_p"].values, marker="o", linestyle="-", color="b", label="势能月平均值")
# 添加标题和标签
ax9.set_title("动能和势能分布情况", fontsize=24)
ax9.set_xlabel("Month", fontsize=14)
ax9.set_ylabel("Wave energy (J/kg)", fontsize=14)
# 设定横轴标签
months = full_monthly_avg.index.values.astype(str)
# 获取所有年份的6月和12月的索引
june_indices = [i for i, date in enumerate(months) if date.endswith("-06")]
december_indices = [i for i, date in enumerate(months) if date.endswith("-12")]
selected_indices = june_indices + december_indices
# 只显示选定的标签
ax9.set_xticks(ticks=selected_indices, labels=[months[i] for i in selected_indices], rotation=45)
# 添加网格和图例
# plt.grid()
ax9.legend() # 显示图例
plt.rcParams["font.sans-serif"] = ["SimHei"] # 显示中文
plt.rcParams["axes.unicode_minus"] = False # 正常显示负号
plt.tight_layout()
plt.savefig(path)
plt.close()
return True