zephyr-backend/balloon/read_data.py

import xarray as xr
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d


# 定义四舍五入函数
def round_to_nearest_multiple(value, multiple):
    return round(value / multiple) * multiple


def read_data(path):
    # 读取数据
    with open(path, "rb") as f:
        df = xr.open_dataset(f).to_dataframe().dropna(how="any")

    # 提取指定列
    columns_to_extract = ["alt", "press", "temp", "rh", "u", "v", "wspeed"]
    extracted_df = df[columns_to_extract].copy()

    # 进行单位转换
    extracted_df["alt"] = extracted_df["alt"] / 1000  # km
    extracted_df["rh"] = extracted_df["rh"] * 100  # %

    # 移除重复的高度值
    extracted_df = extracted_df.drop_duplicates(subset=["alt"])
    new_height = np.arange(extracted_df["alt"].min(), extracted_df["alt"].max() + 0.05, 0.05)

    #  将每个高度值转换为最接近0.05的整数倍，并转化为数组
    rounded_heights = [round_to_nearest_multiple(height, 0.05) for height in new_height]
    rounded_heights_np = np.array(rounded_heights)

    # 初始化一个新的 DataFrame 用于存储插值结果
    interpolated_data = pd.DataFrame({"alt": rounded_heights_np})

    # 对每个因变量进行线性插值
    for col in ["press", "temp", "rh", "u", "v", "wspeed"]:
        interp_func = interp1d(extracted_df["alt"], extracted_df[col], kind="linear", fill_value="extrapolate")
        interpolated_data[col] = interp_func(rounded_heights_np)

    return interpolated_data