使用 pyAerial 通过仿真生成数据#

此笔记本使用 NVIDIA cuPHY,通过其在 pyAerial 中的 Python 绑定,为 PUSCH/PDSCH 插槽生成,并使用 NVIDIA Sionna 进行无线信道建模,生成完全符合 5G NR 标准的 PUSCH/PDSCH 数据集。PUSCH/PDSCH 插槽通过不同的无线信道生成和传输。通常,为了使模型尽可能具有通用性,最好使用尽可能多种不同的信道模型来训练模型。此笔记本能够生成包含使用多种不同信道模型(包括例如 3GPP 使用的信道模型)以及不同的 MCS 类和其他传输参数生成的样本的数据集。

[1]:
# Check platform.
import platform
if platform.machine() != 'x86_64':
    raise SystemExit("Unsupported platform!")

导入#

[2]:
import warnings
warnings.filterwarnings('ignore')

import itertools
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3"  # Silence TensorFlow.

import numpy as np
import pandas as pd
import sionna
import tensorflow as tf
from tqdm.notebook import tqdm

from aerial.phy5g.pdsch import PdschTx
from aerial.phy5g.ldpc.util import get_mcs, random_tb
from aerial.util.fapi import dmrs_bit_array_to_fapi
from aerial.util.data import PuschRecord
from aerial.util.data import save_pickle

# This is for Sionna and pyAerial to coexist on the same GPU:
# Configure the notebook to use only a single GPU and allocate only as much memory as needed.
# For more details, see https://tensorflowcn.cn/guide/gpu.
gpus = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

数据集生成参数#

用于生成数据集的参数在此处修改。请注意,某些参数以列表形式给出,这意味着可以为这些参数提供多个值。通常,人们希望训练数据集尽可能多样化,以便使模型能够很好地推广到各种信道条件和不同的传输参数。

[3]:
# This is the target directory. It gets created if it does not exist.
dataset_dir = 'data/example_simulated_dataset/QPSK'
os.makedirs(dataset_dir, exist_ok=True)

# Number of samples is divided roughly evenly between the options below.
num_samples = 12000

# A list of channel models: Suitable values:
# "Rayleigh" - Rayleigh block fading channel model (sionna.channel.RayleighBlockFading)
# "CDL-x", where x is one of ["A", "B", "C", "D", "E"] - for 3GPP CDL channel models
#          as per TR 38.901.
channel_models = ["CDL-D"]

# Speeds to include in the dataset
# This is UE speed in m/s. The direction of travel will be chosen randomly within the x-y plane.
speeds = [0.8333]

# Delay spreads to include in the dataset.
# This is the nominal delay spread in [s]. Please see the CDL documentation
# about how to choose this value.
delay_spreads = [100e-9]

# A list of MCS indices (as per TS 38.214) to include in the dataset.
# MCS table value refers to TS 38.214 as follows:
# 1: TS38.214, table 5.1.3.1-1.
# 2: TS38.214, table 5.1.3.1-2.
# 3: TS38.214, table 5.1.3.1-3.
mcss = [1] # 1, 10, 19 used for QPSK, 16QAM and 64QAM, respectively.
mcs_table = 2

# Es/No values to include in the dataset.
# esnos = [9.0, 9.25, 9.5, 9.75, 10.0, 10.25, 10.5, 10.75, 11.0] # MCS 19
# esnos = [-0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]  # MCS 10
esnos = [-7.75, -7.5, -7.25, -7.0, -6.75, -6.5]  # MCS 1

# These are fixed for the dataset.
num_tx_ant = 1
num_rx_ant = 4
cell_id = 41
carrier_frequency = 3.5e9  # Carrier frequency in Hz.
link_direction = "uplink"
layers = 1
rnti = 20001
scid = 0
data_scid = 41
dmrs_port = 1
dmrs_position = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
start_sym = 0
num_symbols = 14
start_prb = 0
num_prbs = 273

# Numerology and frame structure. See TS 38.211.
mu = 1
num_ofdm_symbols = 14
fft_size = 4096
cyclic_prefix_length = 288
subcarrier_spacing = 30e3
num_guard_subcarriers = (410, 410)
num_slots_per_frame = 20

信道生成#

无线信道生成使用 NVIDIA Sionna 完成。

[4]:
class Channel(sionna.channel.OFDMChannel):
    def __init__(self,
                 link_direction,
                 channel_model,
                 num_tx_ant,
                 num_rx_ant,
                 carrier_frequency,
                 delay_spread,
                 speed,
                 resource_grid):
        self.resource_grid = resource_grid
        self.resource_grid_mapper = sionna.ofdm.ResourceGridMapper(resource_grid)
        self.remove_guard_subcarriers = sionna.ofdm.RemoveNulledSubcarriers(resource_grid)

        # Define the antenna arrays.
        ue_array = sionna.channel.tr38901.Antenna(
            polarization="single",
            polarization_type="V",
            antenna_pattern="38.901",
            carrier_frequency=carrier_frequency
        )
        gnb_array = sionna.channel.tr38901.AntennaArray(
            num_rows=1,
            num_cols=int(num_rx_ant/2),
            polarization="dual",
            polarization_type="cross",
            antenna_pattern="38.901",
            carrier_frequency=carrier_frequency
        )

        if channel_model == "Rayleigh":
            ch_model = sionna.channel.RayleighBlockFading(
                num_rx=1,
                num_rx_ant=num_rx_ant,
                num_tx=1,
                num_tx_ant=num_tx_ant
            )

        elif "CDL" in channel_model:
            cdl_model = channel_model[-1]

            # Configure a channel impulse reponse (CIR) generator for the CDL model.
            ch_model = sionna.channel.tr38901.CDL(
                cdl_model,
                delay_spread,
                carrier_frequency,
                ue_array,
                gnb_array,
                link_direction,
                min_speed=speed
            )
        else:
            raise ValueError(f"Invalid channel model {channel_model}!")

        super().__init__(
            ch_model,
            resource_grid,
            add_awgn=True,
            normalize_channel=True,
            return_channel=False
        )

    def __call__(self, tx_tensor, No):
        # Add batch and num_tx dimensions that Sionna expects and reshape.
        tx_tensor = tf.transpose(tx_tensor, (2, 1, 0))
        tx_tensor = tf.reshape(tx_tensor, (1, -1))[None, None]
        tx_tensor = self.resource_grid_mapper(tx_tensor)
        rx_tensor = super().__call__((tx_tensor, No))
        rx_tensor = self.remove_guard_subcarriers(rx_tensor)
        rx_tensor = rx_tensor[0, 0]
        rx_tensor = tf.transpose(rx_tensor, (2, 1, 0))
        return rx_tensor

# Define the resource grid.
resource_grid = sionna.ofdm.ResourceGrid(
    num_ofdm_symbols=num_ofdm_symbols,
    fft_size=fft_size,
    subcarrier_spacing=subcarrier_spacing,
    num_tx=1,
    num_streams_per_tx=1,
    cyclic_prefix_length=cyclic_prefix_length,
    num_guard_carriers=num_guard_subcarriers,
    dc_null=False,
    pilot_pattern=None,
    pilot_ofdm_symbol_indices=None
)

PDSCH 发射器#

这将创建 PDSCH 发射器。然而,由于 5G NR PDSCH 和 PUSCH 的对称性,这也可以用于生成具有特定参数化的 PUSCH 帧。在本笔记本中,这被用作 PUSCH 发射器来生成上行链路插槽。

[5]:
pxsch_tx = PdschTx(
    cell_id=cell_id,
    num_rx_ant=num_tx_ant,
    num_tx_ant=num_tx_ant,
)

数据集生成#

实际的数据集生成在此处完成。不同的信道、SNR 和 MCS 参数被遍历,每个参数化选择一定数量的样本,使得样本总数接近期望的数量。

上面创建的 PxSCH 发射器用于生成 Tx 帧。然后,此 Tx 帧被馈送到 Sionna 生成的无线信道中。结果数据记录在 Parquet 文件中,其中包含大致遵循 Small Cell Forum FAPI 规范 格式的 PUSCH 记录。

[6]:
num_cases = len(channel_models) * len(esnos) * len(speeds) * len(delay_spreads) * len(mcss)
num_samples_per_param = num_samples // num_cases

# loop different channel models, speeds, delay spreads, MCS levels etc.
pusch_records = []
for (channel_model, esno, speed, delay_spread, mcs) in \
        (pbar := tqdm(itertools.product(channel_models, esnos, speeds, delay_spreads, mcss), total=num_cases)):

    status_str = f"Generating... ({channel_model} | {esno} dB | {speed} m/s | {delay_spread} s | MCS {mcs})"
    pbar.set_description(status_str)

    # Create the channel model.
    channel = Channel(
        link_direction=link_direction,
        channel_model=channel_model,
        num_tx_ant=num_tx_ant,
        num_rx_ant=num_rx_ant,
        carrier_frequency=carrier_frequency,
        delay_spread=delay_spread,
        speed=speed,
        resource_grid=resource_grid
    )

    for sample in range(num_samples_per_param):
        # Generate the dataframe.
        slot_number = sample % num_slots_per_frame

        # Get modulation order and coderate.
        mod_order, coderate = get_mcs(mcs, mcs_table)
        tb_input = random_tb(mod_order, coderate, dmrs_position, num_prbs, start_sym, num_symbols, layers)

        # Transmit PxSCH. This is where we set the dynamically changing parameters.
        # Input parameters are given as lists as the interface supports multiple UEs.
        tx_tensor = pxsch_tx.run(
            tb_inputs=[tb_input],         # Input transport block in bytes.
            num_ues=1,                    # We simulate only one UE here.
            slot=slot_number,             # Slot number.
            dmrs_syms=dmrs_position,      # List of binary numbers indicating which symbols are DMRS.
            start_sym=start_sym,          # Start symbol index.
            num_symbols=num_symbols,      # Number of symbols.
            scids=[scid],                 # DMRS scrambling ID.
            layers=[layers],              # Number of layers (transmission rank).
            dmrs_ports=[dmrs_port],       # DMRS port(s) to be used.
            rntis=[rnti],                 # UE RNTI.
            data_scids=[data_scid],       # Data scrambling ID.
            code_rates=[coderate],        # Code rate
            mod_orders=[mod_order]        # Modulation order
        )[0]

        # Channel transmission and noise.
        No = pow(10., -esno / 10.)
        rx_tensor = channel(tx_tensor, No)
        rx_tensor = np.array(rx_tensor)

        # Save the sample.
        rx_iq_data_filename = "rx_iq_{}_esno{}_speed{}_ds{}_mcs{}_{}.pkl".format(channel_model, esno, speed, delay_spread, mcs, sample)
        rx_iq_data_fullpath = os.path.join(dataset_dir, rx_iq_data_filename)
        save_pickle(data=rx_tensor, filename=rx_iq_data_fullpath)

        # Save noise power and SNR data as user data.
        user_data_filename = "user_data_{}_esno{}_speed{}_ds{}_mcs{}_{}.pkl".format(channel_model, esno, speed, delay_spread, mcs, sample)
        user_data_fullpath = os.path.join(dataset_dir, user_data_filename)
        user_data = dict(
            snr=esno,
            noise_var=No
        )
        save_pickle(data=user_data, filename=user_data_fullpath)

        pusch_record = PuschRecord(
            # SCF FAPI 10.02 UL_TTI.request message parameters:
            pduIdx=0,
            SFN=(sample // num_slots_per_frame) % 1023,
            Slot=slot_number,
            nPDUs=1,
            RachPresent=0,
            nULSCH=1,
            nULCCH=0,
            nGroup=1,
            PDUSize=0,
            pduBitmap=1,
            RNTI=rnti,
            Handle=0,
            BWPSize=273,
            BWPStart=0,
            SubcarrierSpacing=mu,
            CyclicPrefix=0,
            targetCodeRate=coderate * 10,
            qamModOrder=mod_order,
            mcsIndex=mcs,
            mcsTable=mcs_table - 1,  # Different indexing
            TransformPrecoding=1,  # Disabled.
            dataScramblingId=data_scid,
            nrOfLayers=1,
            ulDmrsSymbPos=dmrs_bit_array_to_fapi(dmrs_position),
            dmrsConfigType=0,
            ulDmrsScramblingId=cell_id,
            puschIdentity=cell_id,
            SCID=scid,
            numDmrsCdmGrpsNoData=2,
            dmrsPorts=1,  # Note that FAPI uses a different format compared to cuPHY.
            resourceAlloc=1,
            rbBitmap=np.array(36 * [0]),
            rbStart=0,
            rbSize=273,
            VRBtoPRBMapping=0,
            FrequencyHopping=0,
            txDirectCurrentLocation=0,
            uplinkFrequencyShift7p5khz=0,
            StartSymbolIndex=start_sym,
            NrOfSymbols=num_symbols,
            puschData=None,
            puschUci=None,
            puschPtrs=None,
            dftsOfdm=None,
            Beamforming=None,

            # SCF FAPI 10.02 RxData.indication message parameters:
            HarqID=0,
            PDULen=len(tb_input),
            UL_CQI=255,  # Set to invalid 0xFF.
            TimingAdvance=0,
            RSSI=65535,  # Set to invalid 0xFFFF.
            macPdu=tb_input,

            TbCrcStatus=0,
            NumCb=0,
            CbCrcStatus=None,

            rx_iq_data_filename=rx_iq_data_filename,
            user_data_filename=user_data_filename,

            errInd = ""
        )
        pusch_records.append(pusch_record)

print("Saving...")
df_filename = os.path.join(dataset_dir, "l2_metadata.parquet")
df = pd.DataFrame.from_records(pusch_records, columns=PuschRecord._fields)
df.to_parquet(df_filename, engine="pyarrow")
print("All done!")
Saving...
All done!