nvCOMP Python API 基础#

[1]:
import numpy as np
import cupy as cp

下载示例文件

[2]:
import urllib.request
urllib.request.urlretrieve("http://textfiles.com/etext/NONFICTION/locke-essay-113.txt", "locke-essay-113.txt")
urllib.request.urlretrieve("http://textfiles.com/etext/FICTION/mobydick.txt", "mobydick.txt")
[2]:
('mobydick.txt', <http.client.HTTPMessage at 0x7f9022b42ef0>)

导入 nvComp python 模块并检查版本#

[3]:
from nvidia import nvcomp
print("nvcomp version:", nvcomp.__version__)
print("nvcomp cuda version:", nvcomp.__cuda_version__)
nvcomp version: 4.0.0
nvcomp cuda version: 12030

零拷贝导入主机数组#

[4]:
ascending = np.arange(0, 4096, dtype=np.int32)
nvarr_h = nvcomp.as_array(ascending)
[5]:
print(ascending.__array_interface__)
print(nvarr_h.__array_interface__)
print(nvarr_h.__cuda_array_interface__)
print(nvarr_h.buffer_size)
print(nvarr_h.buffer_kind)
print(nvarr_h.ndim)
print(nvarr_h.dtype)
print(nvarr_h.shape)
print(nvarr_h.strides)
print(nvarr_h.item_size)
print(nvarr_h.size)

{'data': (94335900832880, False), 'strides': None, 'descr': [('', '<i4')], 'typestr': '<i4', 'shape': (4096,), 'version': 3}
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (94335900832880, False), 'version': 3}
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (94335900832880, False), 'version': 3, 'stream': 1}
16384
ArrayBufferKind.STRIDED_HOST
1
int32
(4096,)
(4,)
4
4096

零拷贝导入设备数组#

[6]:
data_gpu = cp.array(ascending)
nvarr_d = nvcomp.as_array(data_gpu)
print(data_gpu.__cuda_array_interface__)
print(nvarr_d.__cuda_array_interface__)
print(nvarr_d.buffer_kind)
print(nvarr_d.ndim)
print(nvarr_d.dtype)
print(nvarr_d.shape)
print(nvarr_d.strides)
print(nvarr_d.item_size)
print(nvarr_d.size)
{'shape': (4096,), 'typestr': '<i4', 'descr': [('', '<i4')], 'stream': 1, 'version': 3, 'strides': None, 'data': (34472984576, False)}
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (34472984576, False), 'version': 3, 'stream': 1}
ArrayBufferKind.STRIDED_DEVICE
1
int32
(4096,)
(4,)
4
4096

将主机数组转换为设备数组#

[7]:
nvarr_d_cnv = nvarr_h.cuda()
print(nvarr_d_cnv.__cuda_array_interface__)
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (47244640256, False), 'version': 3, 'stream': 1}

将设备数组转换为主机数组#

[8]:
nvarr_h_cnv = nvarr_d.cpu()
print(nvarr_h_cnv.__array_interface__)
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (12960415744, False), 'version': 3}

编码单个数组#

读取文本文件

[ ]:
with open('mobydick.txt', "rb") as f: text = f.read()

as_array 支持 python 缓冲区协议,因此我们可以直接将 text 传递给它

[ ]:
nvarr_txt_h = nvcomp.as_array(text)
print (nvarr_txt_h.__array_interface__)
{'shape': (1205404,), 'strides': None, 'typestr': '|u1', 'data': (94335916846944, False), 'version': 3}

传输到设备

[ ]:
nvarr_txt_d = nvarr_txt_h.cuda()
print(nvarr_txt_d.__cuda_array_interface__)
{'shape': (1205404,), 'strides': None, 'typestr': '|u1', 'data': (47244656640, False), 'version': 3, 'stream': 1}

创建编解码器

[ ]:
lz4_codec = nvcomp.Codec(algorithm="LZ4")

编码

[13]:
lz4_comp_arr = lz4_codec.encode(nvarr_txt_d)
[14]:
print(lz4_comp_arr.__cuda_array_interface__)
print(lz4_comp_arr.buffer_kind)
{'shape': (824829,), 'strides': None, 'typestr': '|u1', 'data': (47248921600, False), 'version': 3, 'stream': 94335914071776}
ArrayBufferKind.STRIDED_DEVICE

数组支持 python 缓冲区协议,因此我们可以直接将其传递给 write 函数

[15]:
with  open('mobydick.lz4', "wb") as f:  f.write(lz4_comp_arr.cpu())

将具有标准接口的对象直接传递给 encode 函数

[16]:
lz4_comp_arr = lz4_codec.encode(text)

解码单个数组#

[17]:
nv_dec_array = lz4_codec.decode(lz4_comp_arr)
[18]:
print(nv_dec_array.__cuda_array_interface__)
print(nv_dec_array.buffer_kind)
{'shape': (1205404,), 'strides': None, 'typestr': '|i1', 'data': (47253028864, False), 'version': 3, 'stream': 94335914071776}
ArrayBufferKind.STRIDED_DEVICE

将解码后的数组与原始数组进行比较,并打印解码后数组的前 400 个位置

[19]:
print("Is decoded equal to original?", bytes(nv_dec_array.cpu()) ==  bytes(nvarr_txt_h))
print(bytes(nv_dec_array.cpu())[:400].decode())
Is decoded equal to original? True
Preliminary Matter.

This text of Melville's Moby-Dick is based on the Hendricks House edition.
It was prepared by Professor Eugene F. Irey at the University of Colorado.
Any subsequent copies of this data must include this notice
and any publications resulting from analysis of this data must
include reference to Professor Irey's work.

Etymology  (Supplied by a late consumptive usher to a gra

使用 ANS 编解码器、指定的块大小和校验和策略进行编码和解码

[20]:
ans_codec = nvcomp.Codec(algorithm="ANS", chunk_size=20, checksum_policy = nvcomp.ChecksumPolicy.COMPUTE_AND_VERIFY)
ans_comp_arr = ans_codec.encode(nvarr_d)

定义解码输出类型

[21]:
ans_deco_arr_uint8 = ans_codec.decode(ans_comp_arr)
ans_deco_arr_uint32 = ans_codec.decode(ans_comp_arr, '<u4')

print(ans_deco_arr_uint8.dtype)
print(ans_deco_arr_uint32.dtype)
uint8
uint32

编解码器特定选项#

[22]:
gdeflate_ht_codec = nvcomp.Codec(algorithm="GDeflate", algorithm_type = 0)
gdeflate_lt_codec = nvcomp.Codec(algorithm="Gdeflate", algorithm_type = 1)

[23]:
%%timeit
gdeflate_ht_comp_arr = gdeflate_ht_codec.encode(text[:4096])
300 µs ± 50.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
[24]:
%%timeit
gdeflate_lt_comp_arr = gdeflate_lt_codec.encode(text[:4096])
895 µs ± 24.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
[25]:
gdeflate_ht_comp_arr = gdeflate_ht_codec.encode(text[:4096])
gdeflate_lt_comp_arr = gdeflate_lt_codec.encode(text[:4096])
print("high-throughput, low compression ratio (default) - compressed size:", gdeflate_ht_comp_arr.size)
print("low-throughput, high compression ratio - compressed size:", gdeflate_lt_comp_arr.size)
high-throughput, low compression ratio (default) - compressed size: 2664
low-throughput, high compression ratio - compressed size: 2520

使用多个编解码器编码单个数组#

[26]:
print("Uncompressed size is", nvarr_txt_d.buffer_size)
alogs = ["LZ4", "Snappy", "GDeflate", "Deflate", "Bitcomp", "ANS", "Zstd",  "Cascaded"]
encoded_files = []
for algorithm in alogs:
    codec = nvcomp.Codec(algorithm=algorithm)
    com_arr = codec.encode(nvarr_txt_d)
    print("Compressed size for", algorithm, "is", com_arr.buffer_size, "({:.1%})".format(com_arr.buffer_size/nvarr_txt_d.buffer_size) )
    with  open('mobydick.%s'% algorithm, "wb") as f:  f.write(com_arr.cpu())
    encoded_files.append('mobydick.%s'% algorithm)
Uncompressed size is 1205404
Compressed size for LZ4 is 824829 (68.4%)
Compressed size for Snappy is 863151 (71.6%)
Compressed size for GDeflate is 622632 (51.7%)
Compressed size for Deflate is 619051 (51.4%)
Compressed size for Bitcomp is 986776 (81.9%)
Compressed size for ANS is 737804 (61.2%)
Compressed size for Zstd is 540745 (44.9%)
Compressed size for Cascaded is 1205948 (100.0%)

解码各种格式的单个数组#

[27]:
codec = nvcomp.Codec()
for file_name in encoded_files:
    print("Decoding", file_name,)
    with open(file_name, "rb") as f: comp_bytes = f.read()
    nv_dec_d = codec.decode(comp_bytes) # since it supports buffer protocol we can pass comp_bytes directly
    print ("is equal to original? -", bytes(nv_dec_d.cpu()) ==  bytes(nvarr_txt_h))
Decoding mobydick.LZ4
is equal to original? - True
Decoding mobydick.Snappy
is equal to original? - True
Decoding mobydick.GDeflate
is equal to original? - True
Decoding mobydick.Deflate
is equal to original? - True
Decoding mobydick.Bitcomp
is equal to original? - True
Decoding mobydick.ANS
is equal to original? - True
Decoding mobydick.Zstd
is equal to original? - True
Decoding mobydick.Cascaded
is equal to original? - True

使用各种比特流类型进行编码和解码#

[28]:
print("Uncompressed size is", nvarr_txt_d.buffer_size)
algos = ["LZ4", "Snappy", "Bitcomp", "ANS", "Zstd",  "Cascaded"]
bitstreams = [
    nvcomp.BitstreamKind.NVCOMP_NATIVE,
    nvcomp.BitstreamKind.RAW,
    nvcomp.BitstreamKind.WITH_UNCOMPRESSED_SIZE
]

for algorithm in algos:
    for bitstream_kind in bitstreams:
        codec = nvcomp.Codec(algorithm=algorithm, bitstream_kind=bitstream_kind)
        comp_arr = codec.encode(nvarr_txt_d)
        comp_ratio = comp_arr.buffer_size/nvarr_txt_d.buffer_size
        print("Compressed size for", algorithm, "with bitstream", bitstream_kind, "is", comp_arr.buffer_size, "({:.1%})".format(comp_ratio))
        decomp_array = codec.decode(comp_arr)
        print ("is equal to original? -", bytes(decomp_array.cpu()) ==  bytes(nvarr_txt_d.cpu()))
Uncompressed size is 1205404
Compressed size for LZ4 with bitstream BitstreamKind.NVCOMP_NATIVE is 824829 (68.4%)
is equal to original? - True
Compressed size for LZ4 with bitstream BitstreamKind.RAW is 807075 (67.0%)
is equal to original? - True
Compressed size for LZ4 with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 807079 (67.0%)
is equal to original? - True
Compressed size for Snappy with bitstream BitstreamKind.NVCOMP_NATIVE is 863143 (71.6%)
is equal to original? - True
Compressed size for Snappy with bitstream BitstreamKind.RAW is 854105 (70.9%)
is equal to original? - True
Compressed size for Snappy with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 854113 (70.9%)
is equal to original? - True
Compressed size for Bitcomp with bitstream BitstreamKind.NVCOMP_NATIVE is 986776 (81.9%)
is equal to original? - True
Compressed size for Bitcomp with bitstream BitstreamKind.RAW is 985800 (81.8%)
is equal to original? - True
Compressed size for Bitcomp with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 985808 (81.8%)
is equal to original? - True
Compressed size for ANS with bitstream BitstreamKind.NVCOMP_NATIVE is 737802 (61.2%)
is equal to original? - True
Compressed size for ANS with bitstream BitstreamKind.RAW is 680096 (56.4%)
is equal to original? - True
Compressed size for ANS with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 680104 (56.4%)
is equal to original? - True
Compressed size for Zstd with bitstream BitstreamKind.NVCOMP_NATIVE is 540745 (44.9%)
is equal to original? - True
Compressed size for Zstd with bitstream BitstreamKind.RAW is 527380 (43.8%)
is equal to original? - True
Compressed size for Zstd with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 527388 (43.8%)
is equal to original? - True
Compressed size for Cascaded with bitstream BitstreamKind.NVCOMP_NATIVE is 1205948 (100.0%)
is equal to original? - True
Compressed size for Cascaded with bitstream BitstreamKind.RAW is 1205412 (100.0%)
is equal to original? - True
Compressed size for Cascaded with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 1205420 (100.0%)
is equal to original? - True

批量编码#

[29]:
nv_uncomp_arrays = []
for fn in ['mobydick.txt', 'locke-essay-113.txt']:
    with open(fn, "rb") as f: text = f.read()
    nv_uncomp_arrays.append( nvcomp.as_array(text).cuda())
[30]:
nv_comp_arrays = lz4_comp_arr = lz4_codec.encode(nv_uncomp_arrays)
[31]:
for i in range(len(nv_uncomp_arrays)):
    print(nv_uncomp_arrays[i].__cuda_array_interface__)
    print(nv_comp_arrays[i].__cuda_array_interface__)
{'shape': (1205404,), 'strides': None, 'typestr': '|u1', 'data': (47647160320, False), 'version': 3, 'stream': 1}
{'shape': (824829,), 'strides': None, 'typestr': '|u1', 'data': (47625135104, False), 'version': 3, 'stream': 94335914071776}
{'shape': (1605768,), 'strides': None, 'typestr': '|u1', 'data': (47648366080, False), 'version': 3, 'stream': 1}
{'shape': (978812,), 'strides': None, 'typestr': '|u1', 'data': (47247113728, False), 'version': 3, 'stream': 94335914071776}

批量解码#

[32]:
nv_dec_arrays = lz4_codec.decode(nv_comp_arrays)

与原始数据进行比较

[33]:
for i in range(len(nv_dec_arrays)):
    print("Is decoded equal to original?",  bytes(nv_uncomp_arrays[i].cpu()) ==  bytes(nv_dec_arrays[i].cpu()))
    print("\n", bytes(nv_dec_arrays[i].cpu())[:400].decode())
Is decoded equal to original? True

 Preliminary Matter.

This text of Melville's Moby-Dick is based on the Hendricks House edition.
It was prepared by Professor Eugene F. Irey at the University of Colorado.
Any subsequent copies of this data must include this notice
and any publications resulting from analysis of this data must
include reference to Professor Irey's work.

Etymology  (Supplied by a late consumptive usher to a gra
Is decoded equal to original? True

                                       1690

                    AN ESSAY CONCERNING HUMAN UNDERSTANDING

                                 by John Locke

                       TO THE RIGHT HONOURABLE

            LORD THOMAS, EARL OF PEMBROKE AND MONTGOMERY,

                      BARRON HERBERT OF CARDIFF,

      LORD ROSS, OF KENDAL, PAR, FITZHUGH, MARMION, ST. QUINTIN,

          AND SHURLAND;

批量解码和编码各种格式#

[34]:
algos = ["LZ4", "Snappy", "Bitcomp", "Cascaded", "Zstd", "ANS"]
bitstreams = [
    nvcomp.BitstreamKind.NVCOMP_NATIVE,
    nvcomp.BitstreamKind.RAW,
    nvcomp.BitstreamKind.WITH_UNCOMPRESSED_SIZE
]
for algorithm in algos:
    for bitstream_kind in bitstreams:
        print("Algorithm:", algorithm, "BitstreamKind:", bitstream_kind)
        codec = nvcomp.Codec(algorithm=algorithm, bitstream_kind=bitstream_kind)
        nv_comp_arrays = codec.encode(nv_uncomp_arrays)
        nv_dec_arrays = codec.decode(nv_comp_arrays)
        for i in range(len(nv_dec_arrays)):
            print(" - File #", i)
            print("   -- Uncompressed size:", nv_uncomp_arrays[i].buffer_size)
            print("   -- Compressed size:", nv_comp_arrays[i].buffer_size, "({:.1%})".format(nv_comp_arrays[i].buffer_size/nv_uncomp_arrays[i].buffer_size) )
            print("   -- Is decoded equal to original?",  bytes(nv_uncomp_arrays[i].cpu()) == bytes(nv_dec_arrays[i].cpu()))

Algorithm: LZ4 BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 824829 (68.4%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 978812 (61.0%)
   -- Is decoded equal to original? True
Algorithm: LZ4 BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 807075 (67.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 964181 (60.0%)
   -- Is decoded equal to original? True
Algorithm: LZ4 BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 807079 (67.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 964185 (60.0%)
   -- Is decoded equal to original? True
Algorithm: Snappy BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 863151 (71.6%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 960232 (59.8%)
   -- Is decoded equal to original? True
Algorithm: Snappy BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 854105 (70.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 945120 (58.9%)
   -- Is decoded equal to original? True
Algorithm: Snappy BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 854113 (70.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 945128 (58.9%)
   -- Is decoded equal to original? True
Algorithm: Bitcomp BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 986776 (81.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1300340 (81.0%)
   -- Is decoded equal to original? True
Algorithm: Bitcomp BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 985800 (81.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1299060 (80.9%)
   -- Is decoded equal to original? True
Algorithm: Bitcomp BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 985808 (81.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1299068 (80.9%)
   -- Is decoded equal to original? True
Algorithm: Cascaded BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 1205948 (100.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1606456 (100.0%)
   -- Is decoded equal to original? True
Algorithm: Cascaded BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 1205412 (100.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1605776 (100.0%)
   -- Is decoded equal to original? True
Algorithm: Cascaded BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 1205420 (100.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1605784 (100.0%)
   -- Is decoded equal to original? True
Algorithm: Zstd BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 540745 (44.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 613497 (38.2%)
   -- Is decoded equal to original? True
Algorithm: Zstd BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 527380 (43.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 596140 (37.1%)
   -- Is decoded equal to original? True
Algorithm: Zstd BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 527388 (43.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 596148 (37.1%)
   -- Is decoded equal to original? True
Algorithm: ANS BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 737804 (61.2%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 957184 (59.6%)
   -- Is decoded equal to original? True
Algorithm: ANS BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 680094 (56.4%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 879728 (54.8%)
   -- Is decoded equal to original? True
Algorithm: ANS BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 680102 (56.4%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 879736 (54.8%)
   -- Is decoded equal to original? True

校验和示例#

校验和只能与 nvcomp.BitstreamKind.NVCOMP_NATIVE 比特流类型一起使用。它们在数据压缩之前和解压缩之后计算,并验证解压缩后的数据与原始数据相同。

校验和可以用于防止静默损坏,这种情况可能发生在数据损坏但解码无错误完成时,或者在编码或解码实现中存在错误时

以下示例将展示如何使用它们。创建计算和验证校验和的编解码器

[35]:
codec = nvcomp.Codec(
    algorithm="GDeflate",
    bitstream_kind=nvcomp.BitstreamKind.NVCOMP_NATIVE,
    checksum_policy=nvcomp.ChecksumPolicy.COMPUTE_AND_VERIFY,
)
nv_comp_arrays = codec.encode(nv_uncomp_arrays)

验证当数据正确时没有错误

[36]:
nv_dec_arrays = codec.decode(nv_comp_arrays)
for i in range(len(nv_dec_arrays)):
    print (f"Is array {i} equal to original? -", bytes(nv_dec_arrays[i].cpu()) ==  bytes(nv_uncomp_arrays[i].cpu()))
Is array 0 equal to original? - True
Is array 1 equal to original? - True

在数据中引入人为错误并解码

[37]:
array_with_error = 1

cupy_array = cp.asarray(nv_comp_arrays[array_with_error])
cupy_array[1000] = cupy_array[1000] ^ 176

nv_comp_arrays[array_with_error] = nvcomp.as_array(cupy_array)

nv_dec_arrays = codec.decode(nv_comp_arrays)

检查哪些数组在解码中受到影响(应该只有一个)。校验和仅在首次访问数据时进行验证,任何后续访问都将跳过该检查。

[38]:
for i in range(len(nv_dec_arrays)):
    try:
        print (f"Is array {i} equal to original? -", bytes(nv_dec_arrays[i].cpu()) ==  bytes(nv_uncomp_arrays[i].cpu()))
    except RuntimeError as err:
        print(f"error with decoding array {i}: {err}")
        print(f"Rerunning: Is array {i} equal to original? -", bytes(nv_dec_arrays[i].cpu()) ==  bytes(nv_uncomp_arrays[i].cpu()))
Is array 0 equal to original? - True
error with decoding array 1: Checksum doesn't match.
Rerunning: Is array 1 equal to original? - False