zephyr-backend/balloon/read_data.py
2025-01-15 14:48:48 +08:00

42 lines
1.5 KiB
Python

import xarray as xr
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
# 定义四舍五入函数
def round_to_nearest_multiple(value, multiple):
return round(value / multiple) * multiple
def read_data(path):
# 读取数据
with open(path, "rb") as f:
df = xr.open_dataset(f).to_dataframe().dropna(how="any")
# 提取指定列
columns_to_extract = ["alt", "press", "temp", "rh", "u", "v", "wspeed"]
extracted_df = df[columns_to_extract].copy()
# 进行单位转换
extracted_df["alt"] = extracted_df["alt"] / 1000 # km
extracted_df["rh"] = extracted_df["rh"] * 100 # %
# 移除重复的高度值
extracted_df = extracted_df.drop_duplicates(subset=["alt"])
new_height = np.arange(extracted_df["alt"].min(), extracted_df["alt"].max() + 0.05, 0.05)
# 将每个高度值转换为最接近0.05的整数倍,并转化为数组
rounded_heights = [round_to_nearest_multiple(height, 0.05) for height in new_height]
rounded_heights_np = np.array(rounded_heights)
# 初始化一个新的 DataFrame 用于存储插值结果
interpolated_data = pd.DataFrame({"alt": rounded_heights_np})
# 对每个因变量进行线性插值
for col in ["press", "temp", "rh", "u", "v", "wspeed"]:
interp_func = interp1d(extracted_df["alt"], extracted_df[col], kind="linear", fill_value="extrapolate")
interpolated_data[col] = interp_func(rounded_heights_np)
return interpolated_data