zephyr-backend/modules/tidi/gravity_wave_year.py
2025-05-05 11:24:58 +08:00

932 lines
43 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 原文件TIDI循环重力波
import os
import pandas as pd
import numpy as np
from scipy.io import loadmat
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import seaborn as sns
from CONSTANT import DATA_BASEPATH
# ---------------------------------------------------------------------------------------
# -----vzonal----------------------------------------------------------------------------
def process_vzonal_day(day, year=2015):
try:
# 读取数 据
base_path = DATA_BASEPATH.tidi
height_data = loadmat(rf"{base_path}/{year}/{day:03d}_Height.mat")
lat_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lat.mat")
lon_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lon.mat")
vmeridional_data = loadmat(
rf"{base_path}/{year}/{day:03d}_VMerdional.mat")
vzonal_data = loadmat(rf"{base_path}/{year}/{day:03d}_Vzonal.mat")
# 将数据转换为DataFrame
height_df = pd.DataFrame(height_data['Height'])
lat_df = pd.DataFrame(lat_data['Lat'])
lon_df = pd.DataFrame(lon_data['Lon'])
vmeridional_df = pd.DataFrame(vmeridional_data['VMerdional'])
vzonal_df = pd.DataFrame(vzonal_data['Vzonal'])
# 将经纬度拼接为两列并添加到对应的DataFrame中
lon_lat_df = pd.concat([lon_df, lat_df], axis=1)
lon_lat_df.columns = ['Longitude', 'Latitude']
# 筛选出10到30度纬度范围的数据
lat_filter = (lat_df.values >= 0) & (lat_df.values <= 20)
# 使用纬度范围过滤数据
vmeridional_filtered = vmeridional_df.iloc[:, lat_filter.flatten()]
vzonal_filtered = vzonal_df.iloc[:, lat_filter.flatten()]
lon_lat_filtered = lon_lat_df.iloc[lat_filter.flatten(), :]
# 接着对lon_lat_filtered的经度进行分组0到360度每30度一个区间
bins = range(0, 361, 30)
group_labels = [f"{i}-{i + 29}" for i in range(0, 360, 30)]
lon_lat_filtered['Longitude_Group'] = pd.cut(
lon_lat_filtered['Longitude'], bins=bins, labels=group_labels)
# 获取所有唯一的经度分组标签并按照数值顺序排序
unique_groups = sorted(lon_lat_filtered['Longitude_Group'].unique(
), key=lambda x: int(x.split('-')[0]))
# 按照经度分组获取每个区间对应的vzonal_filtered、vmeridional_filtered数据
grouped_data = {}
insufficient_data_count = 0 # 用于计数数据不足的组数
for group in unique_groups:
mask = lon_lat_filtered['Longitude_Group'] == group
grouped_data[group] = {
'vzonal_filtered': vzonal_filtered.loc[:, mask],
'vmeridional_filtered': vmeridional_filtered.loc[:, mask],
'lon_lat_filtered': lon_lat_filtered.loc[mask]
}
# 计算有效值数量
vzonal_count = grouped_data[group]['vzonal_filtered'].notna(
).sum().sum()
vmeridional_count = grouped_data[group]['vmeridional_filtered'].notna(
).sum().sum()
if vzonal_count <= 20 or vmeridional_count <= 20:
insufficient_data_count += 1
# 如果超过6组数据不足则抛出错误
# if insufficient_data_count > 6:
# raise ValueError("Insufficient data for more than 6 longitude groups in the specified latitude band.")
# 如果代码运行到这里说明所有分组的数据量都足够或者不足的组数不超过6
print("所有分组的数据量都足够")
# -----------计算w0------------------------------------------------------------------------------------------
# 定义期望的12个区间的分组标签
expected_groups = [f"{i}-{i + 29}" for i in range(0, 360, 30)]
# 初始化一个空DataFrame来存储所有区间的均值廓线列名设置为期望的分组标签
W0_profiles_df = pd.DataFrame(columns=expected_groups)
# 遍历grouped_data字典中的每个组
for group, data in grouped_data.items():
# 提取当前组的vzonal_filtered数据
vzonal_filtered = data['vzonal_filtered']
# 计算有效数据的均值廓线跳过NaN值
mean_profile = vzonal_filtered.mean(axis=1, skipna=True)
# 将当前组的均值廓线作为一列添加到W0_profiles_df DataFrame中
W0_profiles_df[group] = mean_profile
# 检查并填充缺失的区间列将缺失的列添加并填充为NaN
for group in expected_groups:
if group not in W0_profiles_df.columns:
W0_profiles_df[group] = pd.Series(
[float('NaN')] * len(W0_profiles_df))
# 打印拼接后的DataFrame以验证
# print("Concatenated mean profiles for all longitude groups:\n", W0_profiles_df)
# 计算每个高度的均值
height_mean_profiles = W0_profiles_df.mean(axis=1)
# 将每个高度的均值作为新的一行添加到DataFrame中All_Heights_Mean就是wn0
W0_profiles_df['All_Heights_Mean'] = height_mean_profiles
wn0_df = W0_profiles_df['All_Heights_Mean']
# -------计算残余量--------------------------------------------------------------------------------------
# 计算每个经度区间的残余值 (即每个区间的值减去该高度的All_Heights_Mean)
residuals_df = W0_profiles_df.drop(columns='All_Heights_Mean').subtract(
W0_profiles_df['All_Heights_Mean'], axis=0)
# --------wn1-------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 12 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results = []
for index, row in residuals_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results_df = pd.DataFrame(fit_results, columns=['A', 'phi', 'C'])
# print(fit_results_df)
# 用于存储每个高度的拟合值
wn1_values = []
for index, row in fit_results_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn1 = single_harmonic(x, A, phi, C)
wn1_values.append(wn1)
# 将拟合值转换为DataFrame
wn1_df = pd.DataFrame(wn1_values, columns=[
f'wn1_{i}' for i in range(12)])
# print(wn1_df)
# 如果wn1_df全为0则跳过下面的计算直接令该天的day_log_gwresult全部为NaN
if (wn1_df == 0).all().all():
return pd.Series(np.nan, index=range(21))
# ------------计算temp-wn0-wn1---------------------------------------------------------
temp_wn0_wn1 = residuals_df.values - wn1_df.values
# 将结果转为 DataFrame
temp_wn0_wn1_df = pd.DataFrame(
temp_wn0_wn1, columns=residuals_df.columns, index=residuals_df.index)
# -------wn2--------------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 6 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results2 = []
for index, row in temp_wn0_wn1_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results2.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results2.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results2.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results2_df = pd.DataFrame(fit_results2, columns=['A', 'phi', 'C'])
# print(fit_results2_df)
# 用于存储每个高度的拟合值
wn2_values = []
for index, row in fit_results2_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn2 = single_harmonic(x, A, phi, C)
wn2_values.append(wn2)
# 将拟合值转换为DataFrame
wn2_df = pd.DataFrame(wn2_values, columns=[
f'wn2_{i}' for i in range(12)])
# print(wn2_df)
# ---------计算temp-wn0-wn1-wn2------------------------------------------------------
temp_wn0_wn1_wn2 = temp_wn0_wn1_df.values - wn2_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_df = pd.DataFrame(
temp_wn0_wn1_wn2, columns=temp_wn0_wn1_df.columns)
# -------wn3-----------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 4 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results3 = []
for index, row in temp_wn0_wn1_wn2_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results3.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results3.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results3.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results3_df = pd.DataFrame(fit_results3, columns=['A', 'phi', 'C'])
# print(fit_results3_df)
# 用于存储每个高度的拟合值
wn3_values = []
for index, row in fit_results3_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn3 = single_harmonic(x, A, phi, C)
wn3_values.append(wn3)
# 将拟合值转换为DataFrame
wn3_df = pd.DataFrame(wn3_values, columns=[
f'wn3_{i}' for i in range(12)])
# print(wn3_df)
# ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------
temp_wn0_wn1_wn2_wn3 = temp_wn0_wn1_wn2_df.values - wn3_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_wn3_df = pd.DataFrame(
temp_wn0_wn1_wn2_wn3, columns=temp_wn0_wn1_wn2_df.columns)
# -------wn4 - ----------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 3 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results4 = []
for index, row in temp_wn0_wn1_wn2_wn3_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results4.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results4.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results4.append((0, 0, 0))
fit_results4_df = pd.DataFrame(fit_results4, columns=['A', 'phi', 'C'])
# print(fit_results4_df)
# 用于存储每个高度的拟合值
wn4_values = []
for index, row in fit_results4_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn4 = single_harmonic(x, A, phi, C)
wn4_values.append(wn4)
# 将拟合值转换为DataFrame
wn4_df = pd.DataFrame(wn4_values, columns=[
f'wn4_{i}' for i in range(12)])
# print(wn4_df)
# ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------
temp_wn0_wn1_wn2_wn3_wn4 = temp_wn0_wn1_wn2_wn3_df.values - wn4_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_wn3_wn4_df = pd.DataFrame(
temp_wn0_wn1_wn2_wn3_wn4, columns=temp_wn0_wn1_wn2_wn3_df.columns)
# -------wn5-----------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 2.4 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results5 = []
for index, row in temp_wn0_wn1_wn2_wn3_wn4_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results5.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results5.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results5.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results5_df = pd.DataFrame(fit_results5, columns=['A', 'phi', 'C'])
# print(fit_results5_df)
# 用于存储每个高度的拟合值
wn5_values = []
for index, row in fit_results5_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn5 = single_harmonic(x, A, phi, C)
wn5_values.append(wn5)
# 将拟合值转换为DataFrame
wn5_df = pd.DataFrame(wn5_values, columns=[
f'wn5_{i}' for i in range(12)])
# print(wn5_df)
# ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------
temp_wn0_wn1_wn2_wn3_wn4_wn5 = temp_wn0_wn1_wn2_wn3_wn4_df.values - wn5_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_wn3_wn4_wn5_df = pd.DataFrame(temp_wn0_wn1_wn2_wn3_wn4_wn5,
columns=temp_wn0_wn1_wn2_wn3_wn4_df.columns)
# ------计算背景温度=wn0+wn1+wn2+wn3+wn4+wn5---------------------------------------------------
background = wn5_df.values + wn4_df.values + \
wn3_df.values + wn2_df.values + wn1_df.values
# wn0只有一列单独处理相加
# 使用 np.isnan 和 np.where 来判断是否为 NaN 或 0避免这些值参与相加
for i in range(21):
wn0_value = wn0_df.iloc[i]
# 只有当 wn0_value 既不是 NaN 也不是 0 时才加到 background 上
if not np.isnan(wn0_value) and wn0_value != 0:
background[i, :] += wn0_value
# 扰动
perturbation = temp_wn0_wn1_wn2_wn3_wn4_wn5_df
# ---------傅里叶变换----------------------------------------------------------------------
# 初始化一个新的DataFrame来保存处理结果
result = pd.DataFrame(
np.nan, index=perturbation.index, columns=perturbation.columns)
# 定义滤波范围
lambda_low = 2 # 2 km
lambda_high = 15 # 15 km
f_low = 2 * np.pi / lambda_high
f_high = 2 * np.pi / lambda_low
# 循环处理perturbation中的每一列
for col in perturbation.columns:
x = perturbation[col]
# 提取有效值
valid_values = x.dropna()
N = len(valid_values) # 有效值的数量
# 找到第一个有效值的索引
first_valid_index = valid_values.index[0] if not valid_values.index.empty else None
height_value = height_df.loc[first_valid_index] if first_valid_index is not None else None
# 如果有效值为空,则跳过该列
if N == 0 or height_value is None:
continue
# 时间序列和频率
dt = 0.25
n = np.arange(N)
t = height_value.values + n * dt
f = n / (N * dt)
# 傅里叶变换
y = np.fft.fft(valid_values.values)
# 频率滤波
yy = y.copy()
freq_filter = (f < f_low) | (f > f_high)
yy[freq_filter] = 0 # 过滤掉指定频段
# 逆傅里叶变换
perturbation_after = np.real(np.fft.ifft(yy))
# 将处理结果插回到result矩阵中
result.loc[valid_values.index, col] = perturbation_after
u2 = result ** 2
u2 = u2.mean(axis=1)
return u2
except FileNotFoundError:
# 如果文件不存在返回全NaN的Series
expected_length = 21
return pd.Series(np.nan, index=range(expected_length))
# 初始化一个空的DataFrame来存储所有天的结果
# -------------------------------------------------------------------------------------------
# --------meridional-------------------------------------------------------------------------
def process_vmeridional_day(day, year=2015):
try:
# 读取数据
base_path = DATA_BASEPATH.tidi
height_data = loadmat(rf"{base_path}/{year}/{day:03d}_Height.mat")
lat_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lat.mat")
lon_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lon.mat")
vmeridional_data = loadmat(
rf"{base_path}/{year}/{day:03d}_VMerdional.mat")
vzonal_data = loadmat(rf"{base_path}/{year}/{day:03d}_Vzonal.mat")
# 将数据转换为DataFrame
height_df = pd.DataFrame(height_data['Height'])
lat_df = pd.DataFrame(lat_data['Lat'])
lon_df = pd.DataFrame(lon_data['Lon'])
vmeridional_df = pd.DataFrame(vmeridional_data['VMerdional'])
vzonal_df = pd.DataFrame(vzonal_data['Vzonal'])
# 将经纬度拼接为两列并添加到对应的DataFrame中
lon_lat_df = pd.concat([lon_df, lat_df], axis=1)
lon_lat_df.columns = ['Longitude', 'Latitude']
# 筛选出10到30度纬度范围的数据
lat_filter = (lat_df.values >= 0) & (lat_df.values <= 20)
# 使用纬度范围过滤数据
vmeridional_filtered = vmeridional_df.iloc[:, lat_filter.flatten()]
vzonal_filtered = vzonal_df.iloc[:, lat_filter.flatten()]
lon_lat_filtered = lon_lat_df.iloc[lat_filter.flatten(), :]
# 接着对lon_lat_filtered的经度进行分组0到360度每30度一个区间
bins = range(0, 361, 30)
group_labels = [f"{i}-{i + 29}" for i in range(0, 360, 30)]
lon_lat_filtered['Longitude_Group'] = pd.cut(
lon_lat_filtered['Longitude'], bins=bins, labels=group_labels)
# 获取所有唯一的经度分组标签并按照数值顺序排序
unique_groups = sorted(lon_lat_filtered['Longitude_Group'].unique(
), key=lambda x: int(x.split('-')[0]))
# 按照经度分组获取每个区间对应的vzonal_filtered、vmeridional_filtered数据
grouped_data = {}
insufficient_data_count = 0 # 用于计数数据不足的组数
for group in unique_groups:
mask = lon_lat_filtered['Longitude_Group'] == group
grouped_data[group] = {
'vzonal_filtered': vzonal_filtered.loc[:, mask],
'vmeridional_filtered': vmeridional_filtered.loc[:, mask],
'lon_lat_filtered': lon_lat_filtered.loc[mask]
}
# 计算有效值数量
vzonal_count = grouped_data[group]['vzonal_filtered'].notna(
).sum().sum()
vmeridional_count = grouped_data[group]['vmeridional_filtered'].notna(
).sum().sum()
if vzonal_count <= 20 or vmeridional_count <= 20:
insufficient_data_count += 1
# 如果超过6组数据不足则抛出错误
# if insufficient_data_count > 6:
# raise ValueError(
# "Insufficient data for more than 6 longitude groups in the specified latitude band.")
# 如果代码运行到这里说明所有分组的数据量都足够或者不足的组数不超过6
print("所有分组的数据量都足够")
# -----------计算w0------------------------------------------------------------------------------------------
# 定义期望的12个区间的分组标签
expected_groups = [f"{i}-{i + 29}" for i in range(0, 360, 30)]
# 初始化一个空DataFrame来存储所有区间的均值廓线列名设置为期望的分组标签
W0_profiles_df = pd.DataFrame(columns=expected_groups)
# 遍历grouped_data字典中的每个组
for group, data in grouped_data.items():
# 提取当前组的vzonal_filtered数据
vmeridional_filtered = data['vmeridional_filtered']
# 计算有效数据的均值廓线跳过NaN值
mean_profile = vmeridional_filtered.mean(axis=1, skipna=True)
# 将当前组的均值廓线作为一列添加到W0_profiles_df DataFrame中
W0_profiles_df[group] = mean_profile
# 检查并填充缺失的区间列将缺失的列添加并填充为NaN
for group in expected_groups:
if group not in W0_profiles_df.columns:
W0_profiles_df[group] = pd.Series(
[float('NaN')] * len(W0_profiles_df))
# 打印拼接后的DataFrame以验证
# print("Concatenated mean profiles for all longitude groups:\n", W0_profiles_df)
# 计算每个高度的均值
height_mean_profiles = W0_profiles_df.mean(axis=1)
# 将每个高度的均值作为新的一行添加到DataFrame中All_Heights_Mean就是wn0
W0_profiles_df['All_Heights_Mean'] = height_mean_profiles
wn0_df = W0_profiles_df['All_Heights_Mean']
# -------计算残余量--------------------------------------------------------------------------------------
# 计算每个经度区间的残余值 (即每个区间的值减去该高度的All_Heights_Mean)
residuals_df = W0_profiles_df.drop(columns='All_Heights_Mean').subtract(
W0_profiles_df['All_Heights_Mean'], axis=0)
# --------wn1-------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 12 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results = []
for index, row in residuals_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results_df = pd.DataFrame(fit_results, columns=['A', 'phi', 'C'])
# print(fit_results_df)
# 用于存储每个高度的拟合值
wn1_values = []
for index, row in fit_results_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn1 = single_harmonic(x, A, phi, C)
wn1_values.append(wn1)
# 将拟合值转换为DataFrame
wn1_df = pd.DataFrame(wn1_values, columns=[
f'wn1_{i}' for i in range(12)])
# print(wn1_df)
# 如果wn1_df全为0则跳过下面的计算直接令该天的day_log_gwresult全部为NaN
if (wn1_df == 0).all().all():
return pd.Series(np.nan, index=range(21))
# ------------计算temp-wn0-wn1---------------------------------------------------------
temp_wn0_wn1 = residuals_df.values - wn1_df.values
# 将结果转为 DataFrame
temp_wn0_wn1_df = pd.DataFrame(
temp_wn0_wn1, columns=residuals_df.columns, index=residuals_df.index)
# -------wn2--------------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 6 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results2 = []
for index, row in temp_wn0_wn1_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results2.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results2.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results2.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results2_df = pd.DataFrame(fit_results2, columns=['A', 'phi', 'C'])
# print(fit_results2_df)
# 用于存储每个高度的拟合值
wn2_values = []
for index, row in fit_results2_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn2 = single_harmonic(x, A, phi, C)
wn2_values.append(wn2)
# 将拟合值转换为DataFrame
wn2_df = pd.DataFrame(wn2_values, columns=[
f'wn2_{i}' for i in range(12)])
# print(wn2_df)
# ---------计算temp-wn0-wn1-wn2------------------------------------------------------
temp_wn0_wn1_wn2 = temp_wn0_wn1_df.values - wn2_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_df = pd.DataFrame(
temp_wn0_wn1_wn2, columns=temp_wn0_wn1_df.columns)
# -------wn3-----------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 4 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results3 = []
for index, row in temp_wn0_wn1_wn2_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results3.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results3.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results3.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results3_df = pd.DataFrame(fit_results3, columns=['A', 'phi', 'C'])
# print(fit_results3_df)
# 用于存储每个高度的拟合值
wn3_values = []
for index, row in fit_results3_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn3 = single_harmonic(x, A, phi, C)
wn3_values.append(wn3)
# 将拟合值转换为DataFrame
wn3_df = pd.DataFrame(wn3_values, columns=[
f'wn3_{i}' for i in range(12)])
# print(wn3_df)
# ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------
temp_wn0_wn1_wn2_wn3 = temp_wn0_wn1_wn2_df.values - wn3_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_wn3_df = pd.DataFrame(
temp_wn0_wn1_wn2_wn3, columns=temp_wn0_wn1_wn2_df.columns)
# -------wn4 - ----------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 3 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results4 = []
for index, row in temp_wn0_wn1_wn2_wn3_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results4.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results4.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results4.append((0, 0, 0))
fit_results4_df = pd.DataFrame(fit_results4, columns=['A', 'phi', 'C'])
# print(fit_results4_df)
# 用于存储每个高度的拟合值
wn4_values = []
for index, row in fit_results4_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn4 = single_harmonic(x, A, phi, C)
wn4_values.append(wn4)
# 将拟合值转换为DataFrame
wn4_df = pd.DataFrame(wn4_values, columns=[
f'wn4_{i}' for i in range(12)])
# print(wn4_df)
# ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------
temp_wn0_wn1_wn2_wn3_wn4 = temp_wn0_wn1_wn2_wn3_df.values - wn4_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_wn3_wn4_df = pd.DataFrame(
temp_wn0_wn1_wn2_wn3_wn4, columns=temp_wn0_wn1_wn2_wn3_df.columns)
# -------wn5-----------------------------------------------------------------------
def single_harmonic(x, A, phi, C):
return A * np.sin(2 * np.pi / 2.4 * x + phi) + C
# 用于存储每个高度拟合后的参数结果
fit_results5 = []
for index, row in temp_wn0_wn1_wn2_wn3_wn4_df.iterrows():
# 检查该行是否存在NaN值如果有则跳过拟合直接将参数设为0
if row.isnull().any():
fit_results5.append((0, 0, 0))
continue
x = np.arange(12) # 对应12个位置作为自变量
y = row.values
try:
# 进行曲线拟合
popt, _ = curve_fit(single_harmonic, x, y)
fit_results5.append(popt)
except RuntimeError:
# 如果拟合过程出现问题例如无法收敛等也将参数设为0
fit_results5.append((0, 0, 0))
# 将拟合结果转换为DataFrame
fit_results5_df = pd.DataFrame(fit_results5, columns=['A', 'phi', 'C'])
# print(fit_results5_df)
# 用于存储每个高度的拟合值
wn5_values = []
for index, row in fit_results5_df.iterrows():
A, phi, C = row
x = np.arange(12) # 同样对应12个位置作为自变量
wn5 = single_harmonic(x, A, phi, C)
wn5_values.append(wn5)
# 将拟合值转换为DataFrame
wn5_df = pd.DataFrame(wn5_values, columns=[
f'wn5_{i}' for i in range(12)])
# print(wn5_df)
# ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------
temp_wn0_wn1_wn2_wn3_wn4_wn5 = temp_wn0_wn1_wn2_wn3_wn4_df.values - wn5_df.values
# 转换为 DataFrame
temp_wn0_wn1_wn2_wn3_wn4_wn5_df = pd.DataFrame(temp_wn0_wn1_wn2_wn3_wn4_wn5,
columns=temp_wn0_wn1_wn2_wn3_wn4_df.columns)
# ------计算背景温度=wn0+wn1+wn2+wn3+wn4+wn5---------------------------------------------------
background = wn5_df.values + wn4_df.values + \
wn3_df.values + wn2_df.values + wn1_df.values
# wn0只有一列单独处理相加
# 使用 np.isnan 和 np.where 来判断是否为 NaN 或 0避免这些值参与相加
for i in range(21):
wn0_value = wn0_df.iloc[i]
# 只有当 wn0_value 既不是 NaN 也不是 0 时才加到 background 上
if not np.isnan(wn0_value) and wn0_value != 0:
background[i, :] += wn0_value
# 扰动
perturbation = temp_wn0_wn1_wn2_wn3_wn4_wn5_df
# ---------傅里叶变换----------------------------------------------------------------------
# 初始化一个新的DataFrame来保存处理结果
result = pd.DataFrame(
np.nan, index=perturbation.index, columns=perturbation.columns)
# 定义滤波范围
lambda_low = 2 # 2 km
lambda_high = 15 # 15 km
f_low = 2 * np.pi / lambda_high
f_high = 2 * np.pi / lambda_low
# 循环处理perturbation中的每一列
for col in perturbation.columns:
x = perturbation[col]
# 提取有效值
valid_values = x.dropna()
N = len(valid_values) # 有效值的数量
# 找到第一个有效值的索引
first_valid_index = valid_values.index[0] if not valid_values.index.empty else None
height_value = height_df.loc[first_valid_index] if first_valid_index is not None else None
# 如果有效值为空,则跳过该列
if N == 0 or height_value is None:
continue
# 时间序列和频率
dt = 0.25
n = np.arange(N)
t = height_value.values + n * dt
f = n / (N * dt)
# 傅里叶变换
y = np.fft.fft(valid_values.values)
# 频率滤波
yy = y.copy()
freq_filter = (f < f_low) | (f > f_high)
yy[freq_filter] = 0 # 过滤掉指定频段
# 逆傅里叶变换
perturbation_after = np.real(np.fft.ifft(yy))
# 将处理结果插回到result矩阵中
result.loc[valid_values.index, col] = perturbation_after
v2 = result ** 2
v2 = v2.mean(axis=1)
return v2
except FileNotFoundError:
# 如果文件不存在返回全NaN的Series
expected_length = 21
return pd.Series(np.nan, index=range(expected_length))
days_in_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
# 将日期转换为英文月份
def day_to_month(day):
# 累积每个月的天数,找到对应的月份
cumulative_days = 0
for i, days in enumerate(days_in_month):
cumulative_days += days
if day <= cumulative_days:
return f'{["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"][i]}'
class TidiGravityWPlotMonthly:
def __init__(self, year):
self.year = year
cache_path = f"{DATA_BASEPATH.tidi}/cache"
if os.path.exists(f"{cache_path}/{year}/all_days_vzonal_results.parquet") \
and os.path.exists(f"{cache_path}/{year}/all_days_vmeridional_results.parquet"):
all_days_vzonal_results = pd.read_parquet(
f"{cache_path}/{year}/all_days_vzonal_results.parquet")
all_days_vmeridional_results = pd.read_parquet(
f"{cache_path}/{year}/all_days_vmeridional_results.parquet")
else:
all_days_vzonal_results = pd.DataFrame()
# 循环处理每一天的数据
for day in range(1, 365):
u2 = process_vzonal_day(day, year)
all_days_vzonal_results[rf"{day:02d}"] = u2
# 将结果按列拼接
all_days_vzonal_results.columns = [
f"{day:02d}" for day in range(1, 365)]
# 初始化一个空的DataFrame来存储所有天的结果
all_days_vmeridional_results = pd.DataFrame()
# 循环处理每一天的数据
for day in range(1, 365):
v2 = process_vmeridional_day(day, year)
all_days_vmeridional_results[rf"{day:02d}"] = v2
# 将结果按列拼接
all_days_vmeridional_results.columns = [
f"{day:02d}" for day in range(1, 365)]
# cache the results
# if dir not exists, create it
if not os.path.exists(f"{cache_path}/{year}"):
os.makedirs(f"{cache_path}/{year}")
all_days_vzonal_results.to_parquet(
f"{cache_path}/{year}/all_days_vzonal_results.parquet")
all_days_vmeridional_results.to_parquet(
f"{cache_path}/{year}/all_days_vmeridional_results.parquet")
self.all_days_vzonal_results = all_days_vzonal_results
self.all_days_vmeridional_results = all_days_vmeridional_results
# ---------------------------------------------------------------------------------------------------
# --------经纬向风平方和计算动能--------------------------------------------------------------------------------
# 使用numpy.where来检查两个表格中的对应元素是否都不是NaN
sum_df = np.where(
pd.notna(all_days_vmeridional_results) & pd.notna(
all_days_vzonal_results),
all_days_vmeridional_results + all_days_vzonal_results,
np.nan
)
HP = 1/2*all_days_vmeridional_results+1/2*all_days_vzonal_results
heights = [70.0, 72.5, 75.0, 77.5, 80.0, 82.5, 85.0, 87.5, 90.0, 92.5,
95.0, 97.5, 100.0, 102.5, 105.0, 107.5, 110.0, 112.5, 115.0, 117.5, 120.0]
HP.index = heights
# # 将 DataFrame 保存为 Excel 文件
# HP.to_excel('HP_data.xlsx')
# ----------绘年统计图------------------------------------------------------------------------------------------------------------
data = HP
# 使用 reset_index() 方法将索引变为第一列
data = data.reset_index()
h = data.iloc[:, 0].copy() # 高度,保留作为纵坐标
dates = list(range(1, data.shape[1])) # 日期,作为横坐标
data0 = data.iloc[:, 1:].copy() # 绘图数据
'''数据处理'''
# 反转 h 以确保高度从下往上递增
self.h_reversed = h[::-1].reset_index(drop=True)
data0_reversed = data0[::-1].reset_index(drop=True)
# 将数值大于20的数据点替换为nan
data0_reversed[data0_reversed > 20] = float('nan')
# 转换成月份365天
self.data0_reversed = data0_reversed
self.HP = HP
self.dates = dates
self.months = [day_to_month(day) for day in dates]
def plot_height(self):
h_reversed = self.h_reversed
data0_reversed = self.data0_reversed
dates = self.dates
months = self.months
'''绘图'''
plt.clf()
plt.rcParams['font.family'] = 'SimHei' # 宋体
plt.rcParams['font.size'] = 12 # 中文字号
plt.rcParams['axes.unicode_minus'] = False # 正确显示负号
plt.rcParams['font.sans-serif'] = 'Times New Roman' # 新罗马
plt.rcParams['axes.labelsize'] = 14 # 坐标轴标签字号
plt.rcParams['xtick.labelsize'] = 12 # x轴刻度字号
plt.rcParams['ytick.labelsize'] = 12 # y轴刻度字号
plt.rcParams['legend.fontsize'] = 16 # 图例字号
plt.rcParams['axes.unicode_minus'] = False # 正确显示负号
plt.figure(figsize=(10, 6)) # 设置图像大小
# 绘制热力图,设置 x 和 y 轴的标签
sns.heatmap(data0_reversed, annot=False, cmap='YlGnBu', linewidths=0.5,
yticklabels=h_reversed, xticklabels=months, cbar_kws={'label': '重力波动能'})
# 横坐标过长,设置等间隔展示
interval = 34 # 横坐标显示间隔
plt.xticks(ticks=range(0, len(dates), interval),
labels=months[::interval], rotation=45) # rotation旋转可不加
# 添加轴标签
plt.xlabel('') # X轴标签
plt.ylabel('高度') # Y轴标签
# 显示图形
# plt.show()
def plot_energy(self):
HP = self.HP
# --------------绘制月统计图-------------------------------------------------------------------
# 获取HP的列数
num_cols = HP.shape[1]
# 用于存储按要求计算出的均值列数据
mean_cols = []
start = 0
while start < num_cols:
end = start + 30
if end > num_cols:
end = num_cols
# 提取每30列或不满30列的剩余部分的数据
subset = HP.iloc[:, start:end]
# 计算该部分数据每一行的均值得到一个Series作为新的均值列
mean_series = subset.mean(axis=1)
mean_cols.append(mean_series)
start = end
# 将所有的均值列合并成一个新的DataFrame
result_df = pd.concat(mean_cols, axis=1)
# 对result_df中的每一个元素取自然对数
result_df_log = result_df.applymap(lambda x: np.log(x))
# 通过drop方法删除第一行axis=0表示按行操作inplace=True表示直接在原DataFrame上修改若不想修改原DataFrame可设置为False
result_df_log.drop(70, axis=0, inplace=True)
# 计算每个月的平均值
monthly_average = result_df_log.mean(axis=0)
# 将结果转换为 (1, 12) 形状
# 如果大于12 就把最后一个给删了,保证能转换为(1, 12)
if len(monthly_average) > 12:
monthly_average = monthly_average[:-1]
monthly_average = monthly_average.values.reshape(1, 12)
monthly_average = monthly_average.ravel()
# 生成x轴的月份标签
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
# clear the last plot
plt.clf()
# 绘制折线图
plt.plot(months, monthly_average, marker='o', linestyle='-', color='b')
# 添加标题和标签
plt.title("月平均能量(结果取log)")
plt.xlabel("")
plt.ylabel("平均能量")
# 显示图表
plt.xticks(rotation=45) # 让月份标签更清晰可读
plt.grid(True)
plt.tight_layout()
# 显示图形
# plt.show()