zephyr-backend/modules/cosmic/gravityw_multiday_process.py
2025-02-21 14:24:05 +08:00

288 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
import netCDF4 as nc
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.font_manager as fm
def process_single_file(base_folder_path, i, lat_range=(30, 40)):
# 构建当前文件夹的路径
if i < 10:
folder_name = f"atmPrf_repro2021_2008_00{i}" # 一位数前面加两个0
elif i < 100:
folder_name = f"atmPrf_repro2021_2008_0{i}" # 两位数前面加一个0
else:
folder_name = f"atmPrf_repro2021_2008_{i}" # 三位数不加0
folder_path = os.path.join(base_folder_path, folder_name)
# i should be day
cache_path_nz = f"{base_folder_path}/{folder_name}_mean_ktemp_Nz.npy"
cache_path_ptz = f"{base_folder_path}/{folder_name}_mean_ktemp_Ptz.npy"
if os.path.exists(cache_path_nz) and os.path.exists(cache_path_ptz):
mean_ktemp_Nz = np.load(cache_path_nz)
mean_ktemp_Ptz = np.load(cache_path_ptz)
print(f"Loaded cache for {folder_name}")
return mean_ktemp_Nz, mean_ktemp_Ptz
# 检查文件夹是否存在
if os.path.exists(folder_path):
dfs = []
# 遍历文件夹中的文件
for file_name in os.listdir(folder_path):
if file_name.endswith('.0390_nc'):
finfo = os.path.join(folder_path, file_name)
# print(f"正在处理文件: {finfo}")
try:
dataset = nc.Dataset(finfo, 'r')
# 提取变量数据
temp = dataset.variables['Temp'][:]
altitude = dataset.variables['MSL_alt'][:]
lat = dataset.variables['Lat'][:]
lon = dataset.variables['Lon'][:]
# 创建DataFrame
df = pd.DataFrame({
'Longitude': lon,
'Latitude': lat,
'Altitude': altitude,
'Temperature': temp
})
dataset.close()
# 剔除高度大于60的行
df = df[df['Altitude'] <= 60]
# 对每个文件的数据进行插值
alt_interp = np.linspace(
df['Altitude'].min(), df['Altitude'].max(), 3000)
f_alt = interp1d(
df['Altitude'], df['Altitude'], kind='linear', fill_value="extrapolate")
f_lon = interp1d(
df['Altitude'], df['Longitude'], kind='linear', fill_value="extrapolate")
f_lat = interp1d(
df['Altitude'], df['Latitude'], kind='linear', fill_value="extrapolate")
f_temp = interp1d(
df['Altitude'], df['Temperature'], kind='linear', fill_value="extrapolate")
# 计算插值结果
interpolated_alt = f_alt(alt_interp)
interpolated_lon = f_lon(alt_interp)
interpolated_lat = f_lat(alt_interp)
interpolated_temp = f_temp(alt_interp)
# 创建插值后的DataFrame
interpolated_df = pd.DataFrame({
'Altitude': interpolated_alt,
'Longitude': interpolated_lon,
'Latitude': interpolated_lat,
'Temperature': interpolated_temp
})
# 将插值后的DataFrame添加到列表中
dfs.append(interpolated_df)
except Exception as e:
print(f"处理文件 {finfo} 时出错: {e}")
# 按行拼接所有插值后的DataFrame
final_df = pd.concat(dfs, axis=0, ignore_index=True)
# 获取 DataFrame 的长度
num_rows = len(final_df)
# 生成一个每3000个数从0到2999的序列并重复
altitude_values = np.tile(
np.arange(3000), num_rows // 3000 + 1)[:num_rows]
# 将生成的值赋给 DataFrame 的 'Altitude' 列
final_df['Altitude'] = altitude_values
# 摄氏度换算开尔文
final_df['Temperature'] = final_df['Temperature'] + 273.15
lat_begin, lat_end = lat_range
# 筛选出纬度在30到40度之间的数据
latitude_filtered_df = final_df[(
final_df['Latitude'] >= lat_begin) & (final_df['Latitude'] <= lat_end)]
# 划分经度网格20°的网格
lon_min, lon_max = latitude_filtered_df['Longitude'].min(
), latitude_filtered_df['Longitude'].max()
lon_bins = np.arange(lon_min, lon_max + 20, 20) # 创建经度网格边界
# 将数据分配到网格中
# latitude_filtered_df['Longitude_Grid'] = np.digitize(
# latitude_filtered_df['Longitude'], lon_bins) - 1
latitude_filtered_df.loc[:, 'Longitude_Grid'] = np.digitize(
latitude_filtered_df['Longitude'], lon_bins) - 1
# 对相同高度的温度取均值忽略NaN
altitude_temperature_mean = latitude_filtered_df.groupby(
'Altitude')['Temperature'].mean().reset_index()
# 重命名列,使其更具可读性
altitude_temperature_mean.columns = ['Altitude', 'Mean_Temperature']
# 定义高度的范围这里从0到最短段
altitude_range = range(0, 3000)
all_heights_mean_temperature = [] # 用于存储所有高度下的温度均值结果
for altitude in altitude_range:
# 筛选出当前高度的所有数据
altitude_df = latitude_filtered_df[latitude_filtered_df['Altitude'] == altitude]
# 对Longitude_Grid同一区间的温度取均值
temperature_mean_by_grid = altitude_df.groupby(
'Longitude_Grid')['Temperature'].mean().reset_index()
# 重命名列,使其更具可读性
temperature_mean_by_grid.columns = [
'Longitude_Grid', 'Mean_Temperature']
# 添加高度信息列,方便后续区分不同高度的结果
temperature_mean_by_grid['Altitude'] = altitude
# 将当前高度的结果添加到列表中
all_heights_mean_temperature.append(temperature_mean_by_grid)
# 将所有高度的结果合并为一个DataFrame
combined_mean_temperature_df = pd.concat(
all_heights_mean_temperature, ignore_index=True)
# 基于Altitude列合并两个DataFrame只保留能匹配上的行
merged_df = pd.merge(combined_mean_temperature_df,
altitude_temperature_mean, on='Altitude', how='inner')
# 计算差值减去wn0的扰动
merged_df['Temperature_Difference'] = merged_df['Mean_Temperature_x'] - \
merged_df['Mean_Temperature_y']
# 按Altitude分组
grouped = merged_df.groupby('Altitude')
def single_harmonic(x, A, phi):
return A * np.sin(2 * np.pi / (18 / k) * x + phi)
# 初始化存储每个高度的最佳拟合参数、拟合曲线、残差值以及背景温度的字典
fit_results = {}
fitted_curves = {}
residuals = {}
background_temperatures = {}
for altitude, group in grouped:
y_data = group['Temperature_Difference'].values
x_data = np.arange(len(y_data))
wn0_data = group['Mean_Temperature_y'].values # 获取同一高度下的wn0数据
# 检查Temperature_Difference列是否全部为NaN
if np.all(np.isnan(y_data)):
fit_results[altitude] = {
'A': [np.nan] * 5, 'phi': [np.nan] * 5}
fitted_curves[altitude] = [np.nan * x_data] * 5
residuals[altitude] = np.nan * x_data
background_temperatures[altitude] = np.nan * x_data
else:
# 替换NaN值为非NaN值的均值
y_data = np.where(np.isnan(y_data), np.nanmean(y_data), y_data)
# 初始化存储WN参数和曲线的列表
wn_params = []
wn_curves = []
# 计算wn0使用Mean_Temperature_y列数据
wn0 = wn0_data
# 对WN1至WN5进行拟合
for k in range(1, 6):
# 更新单谐波函数中的k值
def harmonic_func(
x, A, phi): return single_harmonic(x, A, phi)
# 使用curve_fit进行拟合
popt, pcov = curve_fit(harmonic_func, x_data, y_data, p0=[
np.nanmax(y_data) - np.nanmin(y_data), 0])
A_fit, phi_fit = popt
# 存储拟合结果
wn_params.append({'A': A_fit, 'phi': phi_fit})
# 使用拟合参数生成拟合曲线
WN = harmonic_func(x_data, A_fit, phi_fit)
wn_curves.append(WN)
# 计算残差值
y_data = y_data - WN # 使用残差值作为下一次拟合的y_data
# 存储结果
fit_results[altitude] = wn_params
fitted_curves[altitude] = wn_curves
residuals[altitude] = y_data
# 计算同一高度下的背景温度wn0 + wn1 + wn2 + wn3 + wn4 + wn5
wn_sum = np.sum([wn0] + wn_curves, axis=0)
background_temperatures[altitude] = wn_sum
# 将每个字典转换成一个 DataFrame
df = pd.DataFrame(residuals)
# 使用前向填充(用上一个有效值填充 NaN
df.ffill(axis=1, inplace=True)
# 初始化一个新的字典来保存处理结果
result = {}
# 定义滤波范围
lambda_low = 2 # 2 km
lambda_high = 15 # 15 km
f_low = 2 * np.pi / lambda_high
f_high = 2 * np.pi / lambda_low
# 循环处理df的每一行每个高度
for idx, residuals_array in df.iterrows():
# 提取有效值
valid_values = np.ma.masked_array(
residuals_array, np.isnan(residuals_array))
compressed_values = valid_values.compressed() # 去除NaN值后的数组
N = len(compressed_values) # 有效值的数量
# 如果有效值为空即所有值都是NaN则将结果设置为NaN
if N == 0:
result[idx] = np.full_like(residuals_array, np.nan)
else:
# 时间序列和频率
dt = 0.02 # 假设的时间间隔
n = np.arange(N)
f = n / (N * dt)
# 傅里叶变换
y = np.fft.fft(compressed_values)
# 频率滤波
yy = y.copy()
freq_filter = (f >= f_low) & (f <= f_high)
yy[~freq_filter] = 0
# 逆傅里叶变换
perturbation_after = np.real(np.fft.ifft(yy))
# 将处理结果插回到result字典中
result[idx] = perturbation_after
# 处理背景温度和扰动温度数据格式
heights = list(background_temperatures.keys())
data_length = len(next(iter(background_temperatures.values())))
background_matrix = np.zeros((data_length, len(heights)))
for idx, height in enumerate(heights):
background_matrix[:, idx] = background_temperatures[height]
heights = list(result.keys())
data_length = len(next(iter(result.values())))
perturbation_matrix = np.zeros((data_length, len(heights)))
for idx, height in enumerate(heights):
perturbation_matrix[:, idx] = result[height]
perturbation_matrix = perturbation_matrix.T
# 计算 Brunt-Väisälä 频率和势能
heights_for_calc = np.linspace(0, 60, 3000) * 1000
def brunt_vaisala_frequency(g, BT_z, c_p, heights):
# 计算位温随高度的变化率
dBT_z_dz = np.gradient(BT_z, heights)
# 计算 Brunt-Väisälä 频率,根号内取绝对值
frequency_squared = (g / BT_z) * ((g / c_p) + dBT_z_dz)
frequency = np.sqrt(np.abs(frequency_squared))
return frequency
# 计算势能
def calculate_gravitational_potential_energy(g, BT_z, N_z, PT_z):
# 计算势能
return 0.5 * ((g / N_z) ** 2) * ((PT_z / BT_z) ** 2)
g = 9.81 # 重力加速度
c_p = 1004.5 # 比热容
N_z_matrix = []
PT_z_matrix = []
for i in range(background_matrix.shape[0]):
BT_z = np.array(background_matrix[i])
PT_z = np.array(perturbation_matrix[i])
N_z = brunt_vaisala_frequency(g, BT_z, c_p, heights_for_calc)
PW = calculate_gravitational_potential_energy(g, BT_z, N_z, PT_z)
N_z_matrix.append(N_z)
PT_z_matrix.append(PW)
ktemp_Nz = np.vstack(N_z_matrix)
ktemp_Ptz = np.vstack(PT_z_matrix)
mean_ktemp_Nz = np.mean(ktemp_Nz, axis=0)
mean_ktemp_Ptz = np.mean(ktemp_Ptz, axis=0)
# save to cache
np.save(cache_path_nz, mean_ktemp_Nz)
np.save(cache_path_ptz, mean_ktemp_Ptz)
return mean_ktemp_Nz, mean_ktemp_Ptz
else:
print(f"文件夹 {folder_path} 不存在。")
return None, None