# 原文件:TIDI循环重力波 import os import pandas as pd import numpy as np from scipy.io import loadmat from scipy.optimize import curve_fit import matplotlib.pyplot as plt import seaborn as sns from CONSTANT import DATA_BASEPATH # --------------------------------------------------------------------------------------- # -----vzonal---------------------------------------------------------------------------- def process_vzonal_day(day, year=2015): try: # 读取数 据 base_path = DATA_BASEPATH.tidi height_data = loadmat(rf"{base_path}/{year}/{day:03d}_Height.mat") lat_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lat.mat") lon_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lon.mat") vmeridional_data = loadmat( rf"{base_path}/{year}/{day:03d}_VMerdional.mat") vzonal_data = loadmat(rf"{base_path}/{year}/{day:03d}_Vzonal.mat") # 将数据转换为DataFrame height_df = pd.DataFrame(height_data['Height']) lat_df = pd.DataFrame(lat_data['Lat']) lon_df = pd.DataFrame(lon_data['Lon']) vmeridional_df = pd.DataFrame(vmeridional_data['VMerdional']) vzonal_df = pd.DataFrame(vzonal_data['Vzonal']) # 将经纬度拼接为两列并添加到对应的DataFrame中 lon_lat_df = pd.concat([lon_df, lat_df], axis=1) lon_lat_df.columns = ['Longitude', 'Latitude'] # 筛选出10到30度纬度范围的数据 lat_filter = (lat_df.values >= 0) & (lat_df.values <= 20) # 使用纬度范围过滤数据 vmeridional_filtered = vmeridional_df.iloc[:, lat_filter.flatten()] vzonal_filtered = vzonal_df.iloc[:, lat_filter.flatten()] lon_lat_filtered = lon_lat_df.iloc[lat_filter.flatten(), :] # 接着对lon_lat_filtered的经度进行分组,0到360度每30度一个区间 bins = range(0, 361, 30) group_labels = [f"{i}-{i + 29}" for i in range(0, 360, 30)] lon_lat_filtered['Longitude_Group'] = pd.cut( lon_lat_filtered['Longitude'], bins=bins, labels=group_labels) # 获取所有唯一的经度分组标签并按照数值顺序排序 unique_groups = sorted(lon_lat_filtered['Longitude_Group'].unique( ), key=lambda x: int(x.split('-')[0])) # 按照经度分组获取每个区间对应的vzonal_filtered、vmeridional_filtered数据 grouped_data = {} insufficient_data_count = 0 # 用于计数数据不足的组数 for group in unique_groups: mask = lon_lat_filtered['Longitude_Group'] == group grouped_data[group] = { 'vzonal_filtered': vzonal_filtered.loc[:, mask], 'vmeridional_filtered': vmeridional_filtered.loc[:, mask], 'lon_lat_filtered': lon_lat_filtered.loc[mask] } # 计算有效值数量 vzonal_count = grouped_data[group]['vzonal_filtered'].notna( ).sum().sum() vmeridional_count = grouped_data[group]['vmeridional_filtered'].notna( ).sum().sum() if vzonal_count <= 20 or vmeridional_count <= 20: insufficient_data_count += 1 # 如果超过6组数据不足,则抛出错误 # if insufficient_data_count > 6: # raise ValueError("Insufficient data for more than 6 longitude groups in the specified latitude band.") # 如果代码运行到这里,说明所有分组的数据量都足够或者不足的组数不超过6 print("所有分组的数据量都足够") # -----------计算w0------------------------------------------------------------------------------------------ # 定义期望的12个区间的分组标签 expected_groups = [f"{i}-{i + 29}" for i in range(0, 360, 30)] # 初始化一个空DataFrame来存储所有区间的均值廓线,列名设置为期望的分组标签 W0_profiles_df = pd.DataFrame(columns=expected_groups) # 遍历grouped_data字典中的每个组 for group, data in grouped_data.items(): # 提取当前组的vzonal_filtered数据 vzonal_filtered = data['vzonal_filtered'] # 计算有效数据的均值廓线,跳过NaN值 mean_profile = vzonal_filtered.mean(axis=1, skipna=True) # 将当前组的均值廓线作为一列添加到W0_profiles_df DataFrame中 W0_profiles_df[group] = mean_profile # 检查并填充缺失的区间列,将缺失的列添加并填充为NaN for group in expected_groups: if group not in W0_profiles_df.columns: W0_profiles_df[group] = pd.Series( [float('NaN')] * len(W0_profiles_df)) # 打印拼接后的DataFrame以验证 # print("Concatenated mean profiles for all longitude groups:\n", W0_profiles_df) # 计算每个高度的均值 height_mean_profiles = W0_profiles_df.mean(axis=1) # 将每个高度的均值作为新的一行添加到DataFrame中,All_Heights_Mean就是wn0 W0_profiles_df['All_Heights_Mean'] = height_mean_profiles wn0_df = W0_profiles_df['All_Heights_Mean'] # -------计算残余量-------------------------------------------------------------------------------------- # 计算每个经度区间的残余值 (即每个区间的值减去该高度的All_Heights_Mean) residuals_df = W0_profiles_df.drop(columns='All_Heights_Mean').subtract( W0_profiles_df['All_Heights_Mean'], axis=0) # --------wn1------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 12 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results = [] for index, row in residuals_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results_df = pd.DataFrame(fit_results, columns=['A', 'phi', 'C']) # print(fit_results_df) # 用于存储每个高度的拟合值 wn1_values = [] for index, row in fit_results_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn1 = single_harmonic(x, A, phi, C) wn1_values.append(wn1) # 将拟合值转换为DataFrame wn1_df = pd.DataFrame(wn1_values, columns=[ f'wn1_{i}' for i in range(12)]) # print(wn1_df) # 如果wn1_df全为0,则跳过下面的计算,直接令该天的day_log_gwresult全部为NaN if (wn1_df == 0).all().all(): return pd.Series(np.nan, index=range(21)) # ------------计算temp-wn0-wn1--------------------------------------------------------- temp_wn0_wn1 = residuals_df.values - wn1_df.values # 将结果转为 DataFrame temp_wn0_wn1_df = pd.DataFrame( temp_wn0_wn1, columns=residuals_df.columns, index=residuals_df.index) # -------wn2-------------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 6 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results2 = [] for index, row in temp_wn0_wn1_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results2.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results2.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results2.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results2_df = pd.DataFrame(fit_results2, columns=['A', 'phi', 'C']) # print(fit_results2_df) # 用于存储每个高度的拟合值 wn2_values = [] for index, row in fit_results2_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn2 = single_harmonic(x, A, phi, C) wn2_values.append(wn2) # 将拟合值转换为DataFrame wn2_df = pd.DataFrame(wn2_values, columns=[ f'wn2_{i}' for i in range(12)]) # print(wn2_df) # ---------计算temp-wn0-wn1-wn2------------------------------------------------------ temp_wn0_wn1_wn2 = temp_wn0_wn1_df.values - wn2_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_df = pd.DataFrame( temp_wn0_wn1_wn2, columns=temp_wn0_wn1_df.columns) # -------wn3----------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 4 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results3 = [] for index, row in temp_wn0_wn1_wn2_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results3.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results3.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results3.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results3_df = pd.DataFrame(fit_results3, columns=['A', 'phi', 'C']) # print(fit_results3_df) # 用于存储每个高度的拟合值 wn3_values = [] for index, row in fit_results3_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn3 = single_harmonic(x, A, phi, C) wn3_values.append(wn3) # 将拟合值转换为DataFrame wn3_df = pd.DataFrame(wn3_values, columns=[ f'wn3_{i}' for i in range(12)]) # print(wn3_df) # ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------ temp_wn0_wn1_wn2_wn3 = temp_wn0_wn1_wn2_df.values - wn3_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_wn3_df = pd.DataFrame( temp_wn0_wn1_wn2_wn3, columns=temp_wn0_wn1_wn2_df.columns) # -------wn4 - ---------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 3 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results4 = [] for index, row in temp_wn0_wn1_wn2_wn3_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results4.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results4.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results4.append((0, 0, 0)) fit_results4_df = pd.DataFrame(fit_results4, columns=['A', 'phi', 'C']) # print(fit_results4_df) # 用于存储每个高度的拟合值 wn4_values = [] for index, row in fit_results4_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn4 = single_harmonic(x, A, phi, C) wn4_values.append(wn4) # 将拟合值转换为DataFrame wn4_df = pd.DataFrame(wn4_values, columns=[ f'wn4_{i}' for i in range(12)]) # print(wn4_df) # ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------ temp_wn0_wn1_wn2_wn3_wn4 = temp_wn0_wn1_wn2_wn3_df.values - wn4_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_wn3_wn4_df = pd.DataFrame( temp_wn0_wn1_wn2_wn3_wn4, columns=temp_wn0_wn1_wn2_wn3_df.columns) # -------wn5----------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 2.4 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results5 = [] for index, row in temp_wn0_wn1_wn2_wn3_wn4_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results5.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results5.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results5.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results5_df = pd.DataFrame(fit_results5, columns=['A', 'phi', 'C']) # print(fit_results5_df) # 用于存储每个高度的拟合值 wn5_values = [] for index, row in fit_results5_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn5 = single_harmonic(x, A, phi, C) wn5_values.append(wn5) # 将拟合值转换为DataFrame wn5_df = pd.DataFrame(wn5_values, columns=[ f'wn5_{i}' for i in range(12)]) # print(wn5_df) # ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------ temp_wn0_wn1_wn2_wn3_wn4_wn5 = temp_wn0_wn1_wn2_wn3_wn4_df.values - wn5_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_wn3_wn4_wn5_df = pd.DataFrame(temp_wn0_wn1_wn2_wn3_wn4_wn5, columns=temp_wn0_wn1_wn2_wn3_wn4_df.columns) # ------计算背景温度=wn0+wn1+wn2+wn3+wn4+wn5--------------------------------------------------- background = wn5_df.values + wn4_df.values + \ wn3_df.values + wn2_df.values + wn1_df.values # wn0只有一列单独处理相加 # 使用 np.isnan 和 np.where 来判断是否为 NaN 或 0,避免这些值参与相加 for i in range(21): wn0_value = wn0_df.iloc[i] # 只有当 wn0_value 既不是 NaN 也不是 0 时才加到 background 上 if not np.isnan(wn0_value) and wn0_value != 0: background[i, :] += wn0_value # 扰动 perturbation = temp_wn0_wn1_wn2_wn3_wn4_wn5_df # ---------傅里叶变换---------------------------------------------------------------------- # 初始化一个新的DataFrame来保存处理结果 result = pd.DataFrame( np.nan, index=perturbation.index, columns=perturbation.columns) # 定义滤波范围 lambda_low = 2 # 2 km lambda_high = 15 # 15 km f_low = 2 * np.pi / lambda_high f_high = 2 * np.pi / lambda_low # 循环处理perturbation中的每一列 for col in perturbation.columns: x = perturbation[col] # 提取有效值 valid_values = x.dropna() N = len(valid_values) # 有效值的数量 # 找到第一个有效值的索引 first_valid_index = valid_values.index[0] if not valid_values.index.empty else None height_value = height_df.loc[first_valid_index] if first_valid_index is not None else None # 如果有效值为空,则跳过该列 if N == 0 or height_value is None: continue # 时间序列和频率 dt = 0.25 n = np.arange(N) t = height_value.values + n * dt f = n / (N * dt) # 傅里叶变换 y = np.fft.fft(valid_values.values) # 频率滤波 yy = y.copy() freq_filter = (f < f_low) | (f > f_high) yy[freq_filter] = 0 # 过滤掉指定频段 # 逆傅里叶变换 perturbation_after = np.real(np.fft.ifft(yy)) # 将处理结果插回到result矩阵中 result.loc[valid_values.index, col] = perturbation_after u2 = result ** 2 u2 = u2.mean(axis=1) return u2 except FileNotFoundError: # 如果文件不存在,返回全NaN的Series expected_length = 21 return pd.Series(np.nan, index=range(expected_length)) # 初始化一个空的DataFrame来存储所有天的结果 # ------------------------------------------------------------------------------------------- # --------meridional------------------------------------------------------------------------- def process_vmeridional_day(day, year=2015): try: # 读取数据 base_path = DATA_BASEPATH.tidi height_data = loadmat(rf"{base_path}/{year}/{day:03d}_Height.mat") lat_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lat.mat") lon_data = loadmat(rf"{base_path}/{year}/{day:03d}_Lon.mat") vmeridional_data = loadmat( rf"{base_path}/{year}/{day:03d}_VMerdional.mat") vzonal_data = loadmat(rf"{base_path}/{year}/{day:03d}_Vzonal.mat") # 将数据转换为DataFrame height_df = pd.DataFrame(height_data['Height']) lat_df = pd.DataFrame(lat_data['Lat']) lon_df = pd.DataFrame(lon_data['Lon']) vmeridional_df = pd.DataFrame(vmeridional_data['VMerdional']) vzonal_df = pd.DataFrame(vzonal_data['Vzonal']) # 将经纬度拼接为两列并添加到对应的DataFrame中 lon_lat_df = pd.concat([lon_df, lat_df], axis=1) lon_lat_df.columns = ['Longitude', 'Latitude'] # 筛选出10到30度纬度范围的数据 lat_filter = (lat_df.values >= 0) & (lat_df.values <= 20) # 使用纬度范围过滤数据 vmeridional_filtered = vmeridional_df.iloc[:, lat_filter.flatten()] vzonal_filtered = vzonal_df.iloc[:, lat_filter.flatten()] lon_lat_filtered = lon_lat_df.iloc[lat_filter.flatten(), :] # 接着对lon_lat_filtered的经度进行分组,0到360度每30度一个区间 bins = range(0, 361, 30) group_labels = [f"{i}-{i + 29}" for i in range(0, 360, 30)] lon_lat_filtered['Longitude_Group'] = pd.cut( lon_lat_filtered['Longitude'], bins=bins, labels=group_labels) # 获取所有唯一的经度分组标签并按照数值顺序排序 unique_groups = sorted(lon_lat_filtered['Longitude_Group'].unique( ), key=lambda x: int(x.split('-')[0])) # 按照经度分组获取每个区间对应的vzonal_filtered、vmeridional_filtered数据 grouped_data = {} insufficient_data_count = 0 # 用于计数数据不足的组数 for group in unique_groups: mask = lon_lat_filtered['Longitude_Group'] == group grouped_data[group] = { 'vzonal_filtered': vzonal_filtered.loc[:, mask], 'vmeridional_filtered': vmeridional_filtered.loc[:, mask], 'lon_lat_filtered': lon_lat_filtered.loc[mask] } # 计算有效值数量 vzonal_count = grouped_data[group]['vzonal_filtered'].notna( ).sum().sum() vmeridional_count = grouped_data[group]['vmeridional_filtered'].notna( ).sum().sum() if vzonal_count <= 20 or vmeridional_count <= 20: insufficient_data_count += 1 # 如果超过6组数据不足,则抛出错误 # if insufficient_data_count > 6: # raise ValueError( # "Insufficient data for more than 6 longitude groups in the specified latitude band.") # 如果代码运行到这里,说明所有分组的数据量都足够或者不足的组数不超过6 print("所有分组的数据量都足够") # -----------计算w0------------------------------------------------------------------------------------------ # 定义期望的12个区间的分组标签 expected_groups = [f"{i}-{i + 29}" for i in range(0, 360, 30)] # 初始化一个空DataFrame来存储所有区间的均值廓线,列名设置为期望的分组标签 W0_profiles_df = pd.DataFrame(columns=expected_groups) # 遍历grouped_data字典中的每个组 for group, data in grouped_data.items(): # 提取当前组的vzonal_filtered数据 vmeridional_filtered = data['vmeridional_filtered'] # 计算有效数据的均值廓线,跳过NaN值 mean_profile = vmeridional_filtered.mean(axis=1, skipna=True) # 将当前组的均值廓线作为一列添加到W0_profiles_df DataFrame中 W0_profiles_df[group] = mean_profile # 检查并填充缺失的区间列,将缺失的列添加并填充为NaN for group in expected_groups: if group not in W0_profiles_df.columns: W0_profiles_df[group] = pd.Series( [float('NaN')] * len(W0_profiles_df)) # 打印拼接后的DataFrame以验证 # print("Concatenated mean profiles for all longitude groups:\n", W0_profiles_df) # 计算每个高度的均值 height_mean_profiles = W0_profiles_df.mean(axis=1) # 将每个高度的均值作为新的一行添加到DataFrame中,All_Heights_Mean就是wn0 W0_profiles_df['All_Heights_Mean'] = height_mean_profiles wn0_df = W0_profiles_df['All_Heights_Mean'] # -------计算残余量-------------------------------------------------------------------------------------- # 计算每个经度区间的残余值 (即每个区间的值减去该高度的All_Heights_Mean) residuals_df = W0_profiles_df.drop(columns='All_Heights_Mean').subtract( W0_profiles_df['All_Heights_Mean'], axis=0) # --------wn1------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 12 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results = [] for index, row in residuals_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results_df = pd.DataFrame(fit_results, columns=['A', 'phi', 'C']) # print(fit_results_df) # 用于存储每个高度的拟合值 wn1_values = [] for index, row in fit_results_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn1 = single_harmonic(x, A, phi, C) wn1_values.append(wn1) # 将拟合值转换为DataFrame wn1_df = pd.DataFrame(wn1_values, columns=[ f'wn1_{i}' for i in range(12)]) # print(wn1_df) # 如果wn1_df全为0,则跳过下面的计算,直接令该天的day_log_gwresult全部为NaN if (wn1_df == 0).all().all(): return pd.Series(np.nan, index=range(21)) # ------------计算temp-wn0-wn1--------------------------------------------------------- temp_wn0_wn1 = residuals_df.values - wn1_df.values # 将结果转为 DataFrame temp_wn0_wn1_df = pd.DataFrame( temp_wn0_wn1, columns=residuals_df.columns, index=residuals_df.index) # -------wn2-------------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 6 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results2 = [] for index, row in temp_wn0_wn1_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results2.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results2.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results2.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results2_df = pd.DataFrame(fit_results2, columns=['A', 'phi', 'C']) # print(fit_results2_df) # 用于存储每个高度的拟合值 wn2_values = [] for index, row in fit_results2_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn2 = single_harmonic(x, A, phi, C) wn2_values.append(wn2) # 将拟合值转换为DataFrame wn2_df = pd.DataFrame(wn2_values, columns=[ f'wn2_{i}' for i in range(12)]) # print(wn2_df) # ---------计算temp-wn0-wn1-wn2------------------------------------------------------ temp_wn0_wn1_wn2 = temp_wn0_wn1_df.values - wn2_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_df = pd.DataFrame( temp_wn0_wn1_wn2, columns=temp_wn0_wn1_df.columns) # -------wn3----------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 4 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results3 = [] for index, row in temp_wn0_wn1_wn2_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results3.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results3.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results3.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results3_df = pd.DataFrame(fit_results3, columns=['A', 'phi', 'C']) # print(fit_results3_df) # 用于存储每个高度的拟合值 wn3_values = [] for index, row in fit_results3_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn3 = single_harmonic(x, A, phi, C) wn3_values.append(wn3) # 将拟合值转换为DataFrame wn3_df = pd.DataFrame(wn3_values, columns=[ f'wn3_{i}' for i in range(12)]) # print(wn3_df) # ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------ temp_wn0_wn1_wn2_wn3 = temp_wn0_wn1_wn2_df.values - wn3_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_wn3_df = pd.DataFrame( temp_wn0_wn1_wn2_wn3, columns=temp_wn0_wn1_wn2_df.columns) # -------wn4 - ---------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 3 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results4 = [] for index, row in temp_wn0_wn1_wn2_wn3_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results4.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results4.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results4.append((0, 0, 0)) fit_results4_df = pd.DataFrame(fit_results4, columns=['A', 'phi', 'C']) # print(fit_results4_df) # 用于存储每个高度的拟合值 wn4_values = [] for index, row in fit_results4_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn4 = single_harmonic(x, A, phi, C) wn4_values.append(wn4) # 将拟合值转换为DataFrame wn4_df = pd.DataFrame(wn4_values, columns=[ f'wn4_{i}' for i in range(12)]) # print(wn4_df) # ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------ temp_wn0_wn1_wn2_wn3_wn4 = temp_wn0_wn1_wn2_wn3_df.values - wn4_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_wn3_wn4_df = pd.DataFrame( temp_wn0_wn1_wn2_wn3_wn4, columns=temp_wn0_wn1_wn2_wn3_df.columns) # -------wn5----------------------------------------------------------------------- def single_harmonic(x, A, phi, C): return A * np.sin(2 * np.pi / 2.4 * x + phi) + C # 用于存储每个高度拟合后的参数结果 fit_results5 = [] for index, row in temp_wn0_wn1_wn2_wn3_wn4_df.iterrows(): # 检查该行是否存在NaN值,如果有则跳过拟合,直接将参数设为0 if row.isnull().any(): fit_results5.append((0, 0, 0)) continue x = np.arange(12) # 对应12个位置作为自变量 y = row.values try: # 进行曲线拟合 popt, _ = curve_fit(single_harmonic, x, y) fit_results5.append(popt) except RuntimeError: # 如果拟合过程出现问题(例如无法收敛等),也将参数设为0 fit_results5.append((0, 0, 0)) # 将拟合结果转换为DataFrame fit_results5_df = pd.DataFrame(fit_results5, columns=['A', 'phi', 'C']) # print(fit_results5_df) # 用于存储每个高度的拟合值 wn5_values = [] for index, row in fit_results5_df.iterrows(): A, phi, C = row x = np.arange(12) # 同样对应12个位置作为自变量 wn5 = single_harmonic(x, A, phi, C) wn5_values.append(wn5) # 将拟合值转换为DataFrame wn5_df = pd.DataFrame(wn5_values, columns=[ f'wn5_{i}' for i in range(12)]) # print(wn5_df) # ---------计算temp-wn0-wn1-wn2-wn3------------------------------------------------------ temp_wn0_wn1_wn2_wn3_wn4_wn5 = temp_wn0_wn1_wn2_wn3_wn4_df.values - wn5_df.values # 转换为 DataFrame temp_wn0_wn1_wn2_wn3_wn4_wn5_df = pd.DataFrame(temp_wn0_wn1_wn2_wn3_wn4_wn5, columns=temp_wn0_wn1_wn2_wn3_wn4_df.columns) # ------计算背景温度=wn0+wn1+wn2+wn3+wn4+wn5--------------------------------------------------- background = wn5_df.values + wn4_df.values + \ wn3_df.values + wn2_df.values + wn1_df.values # wn0只有一列单独处理相加 # 使用 np.isnan 和 np.where 来判断是否为 NaN 或 0,避免这些值参与相加 for i in range(21): wn0_value = wn0_df.iloc[i] # 只有当 wn0_value 既不是 NaN 也不是 0 时才加到 background 上 if not np.isnan(wn0_value) and wn0_value != 0: background[i, :] += wn0_value # 扰动 perturbation = temp_wn0_wn1_wn2_wn3_wn4_wn5_df # ---------傅里叶变换---------------------------------------------------------------------- # 初始化一个新的DataFrame来保存处理结果 result = pd.DataFrame( np.nan, index=perturbation.index, columns=perturbation.columns) # 定义滤波范围 lambda_low = 2 # 2 km lambda_high = 15 # 15 km f_low = 2 * np.pi / lambda_high f_high = 2 * np.pi / lambda_low # 循环处理perturbation中的每一列 for col in perturbation.columns: x = perturbation[col] # 提取有效值 valid_values = x.dropna() N = len(valid_values) # 有效值的数量 # 找到第一个有效值的索引 first_valid_index = valid_values.index[0] if not valid_values.index.empty else None height_value = height_df.loc[first_valid_index] if first_valid_index is not None else None # 如果有效值为空,则跳过该列 if N == 0 or height_value is None: continue # 时间序列和频率 dt = 0.25 n = np.arange(N) t = height_value.values + n * dt f = n / (N * dt) # 傅里叶变换 y = np.fft.fft(valid_values.values) # 频率滤波 yy = y.copy() freq_filter = (f < f_low) | (f > f_high) yy[freq_filter] = 0 # 过滤掉指定频段 # 逆傅里叶变换 perturbation_after = np.real(np.fft.ifft(yy)) # 将处理结果插回到result矩阵中 result.loc[valid_values.index, col] = perturbation_after v2 = result ** 2 v2 = v2.mean(axis=1) return v2 except FileNotFoundError: # 如果文件不存在,返回全NaN的Series expected_length = 21 return pd.Series(np.nan, index=range(expected_length)) days_in_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] # 将日期转换为英文月份 def day_to_month(day): # 累积每个月的天数,找到对应的月份 cumulative_days = 0 for i, days in enumerate(days_in_month): cumulative_days += days if day <= cumulative_days: return f'{["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"][i]}' class TidiGravityWPlotMonthly: def __init__(self, year): self.year = year cache_path = f"{DATA_BASEPATH.tidi}/cache" if os.path.exists(f"{cache_path}/{year}/all_days_vzonal_results.parquet") \ and os.path.exists(f"{cache_path}/{year}/all_days_vmeridional_results.parquet"): all_days_vzonal_results = pd.read_parquet( f"{cache_path}/{year}/all_days_vzonal_results.parquet") all_days_vmeridional_results = pd.read_parquet( f"{cache_path}/{year}/all_days_vmeridional_results.parquet") else: all_days_vzonal_results = pd.DataFrame() # 循环处理每一天的数据 for day in range(1, 365): u2 = process_vzonal_day(day, year) all_days_vzonal_results[rf"{day:02d}"] = u2 # 将结果按列拼接 all_days_vzonal_results.columns = [ f"{day:02d}" for day in range(1, 365)] # 初始化一个空的DataFrame来存储所有天的结果 all_days_vmeridional_results = pd.DataFrame() # 循环处理每一天的数据 for day in range(1, 365): v2 = process_vmeridional_day(day, year) all_days_vmeridional_results[rf"{day:02d}"] = v2 # 将结果按列拼接 all_days_vmeridional_results.columns = [ f"{day:02d}" for day in range(1, 365)] # cache the results # if dir not exists, create it if not os.path.exists(f"{cache_path}/{year}"): os.makedirs(f"{cache_path}/{year}") all_days_vzonal_results.to_parquet( f"{cache_path}/{year}/all_days_vzonal_results.parquet") all_days_vmeridional_results.to_parquet( f"{cache_path}/{year}/all_days_vmeridional_results.parquet") self.all_days_vzonal_results = all_days_vzonal_results self.all_days_vmeridional_results = all_days_vmeridional_results # --------------------------------------------------------------------------------------------------- # --------经纬向风平方和计算动能-------------------------------------------------------------------------------- # 使用numpy.where来检查两个表格中的对应元素是否都不是NaN sum_df = np.where( pd.notna(all_days_vmeridional_results) & pd.notna( all_days_vzonal_results), all_days_vmeridional_results + all_days_vzonal_results, np.nan ) HP = 1/2*all_days_vmeridional_results+1/2*all_days_vzonal_results heights = [70.0, 72.5, 75.0, 77.5, 80.0, 82.5, 85.0, 87.5, 90.0, 92.5, 95.0, 97.5, 100.0, 102.5, 105.0, 107.5, 110.0, 112.5, 115.0, 117.5, 120.0] HP.index = heights # # 将 DataFrame 保存为 Excel 文件 # HP.to_excel('HP_data.xlsx') # ----------绘年统计图------------------------------------------------------------------------------------------------------------ data = HP # 使用 reset_index() 方法将索引变为第一列 data = data.reset_index() h = data.iloc[:, 0].copy() # 高度,保留作为纵坐标 dates = list(range(1, data.shape[1])) # 日期,作为横坐标 data0 = data.iloc[:, 1:].copy() # 绘图数据 '''数据处理''' # 反转 h 以确保高度从下往上递增 self.h_reversed = h[::-1].reset_index(drop=True) data0_reversed = data0[::-1].reset_index(drop=True) # 将数值大于20的数据点替换为nan data0_reversed[data0_reversed > 20] = float('nan') # 转换成月份,365天 self.data0_reversed = data0_reversed self.HP = HP self.dates = dates self.months = [day_to_month(day) for day in dates] def plot_height(self): h_reversed = self.h_reversed data0_reversed = self.data0_reversed dates = self.dates months = self.months '''绘图''' plt.clf() plt.rcParams['font.family'] = 'SimHei' # 宋体 plt.rcParams['font.size'] = 12 # 中文字号 plt.rcParams['axes.unicode_minus'] = False # 正确显示负号 plt.rcParams['font.sans-serif'] = 'Times New Roman' # 新罗马 plt.rcParams['axes.labelsize'] = 14 # 坐标轴标签字号 plt.rcParams['xtick.labelsize'] = 12 # x轴刻度字号 plt.rcParams['ytick.labelsize'] = 12 # y轴刻度字号 plt.rcParams['legend.fontsize'] = 16 # 图例字号 plt.rcParams['axes.unicode_minus'] = False # 正确显示负号 plt.figure(figsize=(10, 6)) # 设置图像大小 # 绘制热力图,设置 x 和 y 轴的标签 sns.heatmap(data0_reversed, annot=False, cmap='YlGnBu', linewidths=0.5, yticklabels=h_reversed, xticklabels=months, cbar_kws={'label': ' Gravitational potential energy'}) # 横坐标过长,设置等间隔展示 interval = 34 # 横坐标显示间隔 plt.xticks(ticks=range(0, len(dates), interval), labels=months[::interval], rotation=45) # rotation旋转可不加 # 添加轴标签 plt.xlabel('月') # X轴标签 plt.ylabel('高度') # Y轴标签 # 显示图形 # plt.show() def plot_energy(self): HP = self.HP # --------------绘制月统计图------------------------------------------------------------------- # 获取HP的列数 num_cols = HP.shape[1] # 用于存储按要求计算出的均值列数据 mean_cols = [] start = 0 while start < num_cols: end = start + 30 if end > num_cols: end = num_cols # 提取每30列(或不满30列的剩余部分)的数据 subset = HP.iloc[:, start:end] # 计算该部分数据每一行的均值,得到一个Series,作为新的均值列 mean_series = subset.mean(axis=1) mean_cols.append(mean_series) start = end # 将所有的均值列合并成一个新的DataFrame result_df = pd.concat(mean_cols, axis=1) # 对result_df中的每一个元素取自然对数 result_df_log = result_df.applymap(lambda x: np.log(x)) # 通过drop方法删除第一行,axis=0表示按行操作,inplace=True表示直接在原DataFrame上修改(若不想修改原DataFrame可设置为False) result_df_log.drop(70, axis=0, inplace=True) # 计算每个月的平均值 monthly_average = result_df_log.mean(axis=0) # 将结果转换为 (1, 12) 形状 # 如果大于12, 就把最后一个给删了,保证能转换为(1, 12) if len(monthly_average) > 12: monthly_average = monthly_average[:-1] monthly_average = monthly_average.values.reshape(1, 12) monthly_average = monthly_average.ravel() # 生成x轴的月份标签 months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] # clear the last plot plt.clf() # 绘制折线图 plt.plot(months, monthly_average, marker='o', linestyle='-', color='b') # 添加标题和标签 plt.title("月平均能量(结果取log)") plt.xlabel("月") plt.ylabel("平均能量") # 显示图表 plt.xticks(rotation=45) # 让月份标签更清晰可读 plt.grid(True) plt.tight_layout() # 显示图形 # plt.show()