2025-aliyunctf-easy-cuda-rev-writeup

比较有意思的题目

读取flag 文件

image.png

使用cuda 进行加密
image.png

内部没执行什么
image.png

查找交叉引用
image.png

有一些回调
image.png

发现注册了 fastbin
image.png

image.png

dump 出来

使用 cuobjdump 解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
      
.version 8.0
.target sm_52
.address_size 64


.extern .func (.param .b32 func_retval0) vprintf
(
.param .b64 vprintf_param_0,
.param .b64 vprintf_param_1
)
;
.const .align 1 .b8 T[256] = {99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, 208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, 224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, 231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, 186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, 112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, 225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22};
.const .align 1 .b8 RT[256] = {82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125};
.global .align 1 .b8 $str_gift1[8] = {103, 105, 102, 116, 49, 58, 10, 0};
.global .align 1 .b8 $str$1[6] = {37, 48, 50, 120, 32, 0};
.global .align 1 .b8 $str$2_n_[2] = {10, 0};
.global .align 1 .b8 $str$3_gift2[8] = {103, 105, 102, 116, 50, 58, 10, 0};
.global .align 1 .b8 $str$4_gift3[8] = {103, 105, 102, 116, 51, 58, 10, 0};
.global .align 1 .b8 $str$5_gift4[8] = {103, 105, 102, 116, 52, 58, 10, 0};
.global .align 1 .b8 $str$6_gift5[8] = {103, 105, 102, 116, 53, 58, 10, 0};

.visible .entry _Z14encrypt_kernelPhh(
.param .u64 _Z14encrypt_kernelPhh_param_0,
.param .u8 _Z14encrypt_kernelPhh_param_1
)
{
.local .align 8 .b8 __local_depot0[8];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<41>;
.reg .b16 %rs<62>;
.reg .b32 %r<265>;
.reg .b64 %rd<103>;


mov.u64 %SPL, __local_depot0;
cvta.local.u64 %SP, %SPL;
ld.param.u8 %rs12, [_Z14encrypt_kernelPhh_param_1];
ld.param.u64 %rd19, [_Z14encrypt_kernelPhh_param_0];

/****
.... 省略一部分
***/

$L__BB0_52:
bar.sync 0;
cvt.u16.u32 %rs54, %r4;
ld.global.u8 %rs55, [%rd3];
xor.b16 %rs56, %rs55, %rs54;
st.global.u8 [%rd3], %rs56;
ret;

}


丢给gpt 解析

1

大概5个逻辑: 没执行一个会输出一个gift, 比较抽象的 0xa00000 次循环,必须使用cuda加速。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15



gift0 - 1 执行5个循环 每个循环 走 0xa00000

gift1 - 2 xor key 两两xor

gift2 - 3 执行两两互换

gift3 - gift4 也是互换 0->end 1,2 = 2,1 依次类推

gift4 - gift5 魔改tea

gift5 - output i=0;xor i; i++

大概这么多逻辑,但要注意加密是分块进行的,每一块 256 字节

编写pycuda执行刚才dump 出来的内容,可得打印的 gift内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pycuda.driver as cuda  
import pycuda.autoinit
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from pycuda import gpuarray

print()

# r_data = ''.join(a14.data)
#
# r_data = r_data.encode()

# r_data = r_data[0x52:]
with open("cuda.bin", mode="rb") as f:
r_data= f.read()
#
# print(r_data)
# 1. 加载PTX代码

def cuda_encrypt(buf: np.ndarray, key: int) -> np.ndarray:
"""GPU加密函数实现
Args: buf: 输入数据(np.ndarray[dtype=np.uint8])
key: 加密密钥(0-255)
Returns: 加密后的数据(np.ndarray[dtype=np.uint8])
""" # 1. 加载Fatbin模块
with open("cuda.bin", "rb") as f:
cubin = f.read()
mod = cuda.module_from_buffer(cubin)

# 2. 获取加密内核(需确认符号名)
encrypt_kernel = mod.get_function("_Z14encrypt_kernelPhh") # C++修饰名

# 3. GPU内存分配
buf_gpu = gpuarray.to_gpu(buf.astype(np.uint8))
buflen = np.int32(buf.size)

# 4. 内核配置参数
block_size = 256
grid_x = (buf.size + block_size - 1) // block_size
grid = (grid_x, 1, 1)
block = (block_size, 1, 1)

# 5. 执行加密内核
encrypt_kernel(
buf_gpu, np.uint8(key),
block=block, grid=grid, shared=0
)

# 6. 结果回传
return buf_gpu.get()


if __name__ == "__main__":
# data = np.random.randint(0, 256, 1024, dtype=np.uint8)

s = "flag_this_is_flag"
s = s + '0' * (256 - len(s))
s = s * 2
data = np.array([ord(i) for i in s], dtype=np.uint8)
encrypted = cuda_encrypt(data, key=0xAc)
print(encrypted)

print(len(encrypted))
# print(f"加密后数据校验和: {np.sum(encrypted)}")
print(len(list(encrypted)))


image.png

逻辑基本上清晰

编写解密:

tea 大概需要跑 5分钟,出结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// TestProject1.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//

#include <iostream>

#include<stdio.h>



__int64 __fastcall xxxxtea_en(unsigned int* a1)
{
__int64 result; // rax
unsigned int v2; // [rsp+14h] [rbp-24h]
unsigned int v3; // [rsp+18h] [rbp-20h]
int i; // [rsp+1Ch] [rbp-1Ch]
unsigned int v5; // [rsp+20h] [rbp-18h]

unsigned int v0 = *a1;
unsigned int v1 = a1[1];


unsigned int sum[] = { 0xf1bbcdc8,387276957,2027808484,0xdaa66d2b,1013904242,0x9e3779b9 };
unsigned int key[] = { 0xa341316c, 0xc8013ea4, 0x3c6ef372, 0x14292967 };
for (int i = 0; i < 0xa00000; i += 8)
{
unsigned int temp0 = 0;
unsigned int temp1 = 0;
v0 += ((v1 << 4) + key[0]) ^ (sum[5] + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[5]) ^ ((v0 >> 5) + key[3]);

v0 += ((v1 << 4) + key[0]) ^ (sum[4] + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[4]) ^ ((v0 >> 5) + key[3]);

v0 += ((v1 << 4) + key[0]) ^ (sum[3] + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[3]) ^ ((v0 >> 5) + key[3]);

v0 += ((v1 << 4) + key[0]) ^ (sum[2] + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[2]) ^ ((v0 >> 5) + key[3]);

v0 += ((v1 << 4) + key[0]) ^ (sum[1] + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[1]) ^ ((v0 >> 5) + key[3]);

v0 += ((v1 << 4) + key[0]) ^ (sum[0] - 1013904242 + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[0] - 1013904242) ^ ((v0 >> 5) + key[3]);

v0 += ((v1 << 4) + key[0]) ^ (sum[0] + 1640531527 + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[0] + 1640531527) ^ ((v0 >> 5) + key[3]);

v0 += ((v1 << 4) + key[0]) ^ (sum[0] + v1) ^ ((v1 >> 5) + key[1]);
v1 += ((v0 << 4) + key[2]) ^ (v0 + sum[0]) ^ ((v0 >> 5) + key[3]);

for (int j = 0; j <= 5; j++)
{
sum[j] += -239350328;
}
}

*a1 = v0;
result = v1;
a1[1] = v1;
return result;
}




__int64 __fastcall xxxxtea_de(unsigned int* a1)
{
__int64 result; // rax
unsigned int v2; // [rsp+14h] [rbp-24h]
unsigned int v3; // [rsp+18h] [rbp-20h]
int i; // [rsp+1Ch] [rbp-1Ch]
unsigned int v5; // [rsp+20h] [rbp-18h]

unsigned int v0 = *a1;
unsigned int v1 = a1[1];


unsigned int sum[] = { 0xf1bbcdc8,387276957,2027808484,0xdaa66d2b,1013904242,0x9e3779b9 };
unsigned int key[] = { 0xa341316c, 0xc8013ea4, 0x3c6ef372, 0x14292967 };
for (int j = 0; j <= 5; j++)
{
sum[j] += 0xf1bbcdc8 * (0xa00000 / 8);
}

for (int i = 0; i < 0xa00000; i += 8)
{

for (int j = 0; j <= 5; j++)
{
sum[j] -= 0xf1bbcdc8;
}

v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[0]) ^ ((v0 >> 5) + key[3]);

v0 -= ((v1 << 4) + key[0]) ^ (sum[0] + v1) ^ ((v1 >> 5) + key[1]);
v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[0] + 1640531527) ^ ((v0 >> 5) + key[3]);
v0 -= ((v1 << 4) + key[0]) ^ (sum[0] + 1640531527 + v1) ^ ((v1 >> 5) + key[1]);
v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[0] - 1013904242) ^ ((v0 >> 5) + key[3]);

v0 -= ((v1 << 4) + key[0]) ^ (sum[0] - 1013904242 + v1) ^ ((v1 >> 5) + key[1]);
v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[1]) ^ ((v0 >> 5) + key[3]);

v0 -= ((v1 << 4) + key[0]) ^ (sum[1] + v1) ^ ((v1 >> 5) + key[1]);
v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[2]) ^ ((v0 >> 5) + key[3]);

v0 -= ((v1 << 4) + key[0]) ^ (sum[2] + v1) ^ ((v1 >> 5) + key[1]);
v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[3]) ^ ((v0 >> 5) + key[3]);

v0 -= ((v1 << 4) + key[0]) ^ (sum[3] + v1) ^ ((v1 >> 5) + key[1]);
v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[4]) ^ ((v0 >> 5) + key[3]);

v0 -= ((v1 << 4) + key[0]) ^ (sum[4] + v1) ^ ((v1 >> 5) + key[1]);
v1 -= ((v0 << 4) + key[2]) ^ (v0 + sum[5]) ^ ((v0 >> 5) + key[3]);

v0 -= ((v1 << 4) + key[0]) ^ (sum[5] + v1) ^ ((v1 >> 5) + key[1]);

}

*a1 = v0;
result = v1;
a1[1] = v1;
return result;
}



int main()
{

FILE* file = fopen(R"(flag_enc)", "rb");

fseek(file, 0, SEEK_END);
long fileSize = ftell(file);
fclose(file);

byte* buf = (byte*)malloc(fileSize);

file = fopen(R"flag_enc)", "rb");
fread(buf, fileSize, 1, file);
fclose(file);

for (size_t i = 0; i < fileSize; i++)
{
buf[i] = (buf[i] ^ i) & 0xff;
}


for (size_t i = 0; i < fileSize / 8 ; i++)
{
xxxxtea_de((unsigned int*) & buf[i*8]);
}

for (size_t i = 0; i < fileSize; i++)
{
printf("%x,", buf[i]);
}
return 0;
}


得到hex ,编写python脚本还原到gift1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
  
T = [99 ,124 ,119 ,123 ,242 ,107 ,111 ,197 ,48 ,1 ,103 ,43 ,254 ,215 ,171 ,118 ,202 ,130 ,201 ,125 ,250 ,89 ,71 ,240
,173 ,212 ,162 ,175 ,156 ,164 ,114 ,192 ,183 ,253 ,147 ,38 ,54 ,63 ,247 ,204 ,52 ,165 ,229 ,241 ,113 ,216 ,49 ,21
,4 ,199 ,35 ,195 ,24 ,150 ,5 ,154 ,7 ,18 ,128 ,226 ,235 ,39 ,178 ,117 ,9 ,131 ,44 ,26 ,27 ,110 ,90 ,160 ,82 ,59
,214 ,179 ,41 ,227 ,47 ,132 ,83 ,209 ,0 ,237 ,32 ,252 ,177 ,91 ,106 ,203 ,190 ,57 ,74 ,76 ,88 ,207 ,208 ,239 ,170
,251 ,67 ,77 ,51 ,133 ,69 ,249 ,2 ,127 ,80 ,60 ,159 ,168 ,81 ,163 ,64 ,143 ,146 ,157 ,56 ,245 ,188 ,182 ,218 ,33
,16 ,255 ,243 ,210 ,205 ,12 ,19 ,236 ,95 ,151 ,68 ,23 ,196 ,167 ,126 ,61 ,100 ,93 ,25 ,115 ,96 ,129 ,79 ,220 ,34
,42 ,144 ,136 ,70 ,238 ,184 ,20 ,222 ,94 ,11 ,219 ,224 ,50 ,58 ,10 ,73 ,6 ,36 ,92 ,194 ,211 ,172 ,98 ,145 ,149 ,228
,121 ,231 ,200 ,55 ,109 ,141 ,213 ,78 ,169 ,108 ,86 ,244 ,234 ,101 ,122 ,174 ,8 ,186 ,120 ,37 ,46 ,28 ,166 ,180
,198 ,232 ,221 ,116 ,31 ,75 ,189 ,139 ,138 ,112 ,62 ,181 ,102 ,72 ,3 ,246 ,14 ,97 ,53 ,87 ,185 ,134 ,193 ,29 ,158
,225 ,248 ,152 ,17 ,105 ,217 ,142 ,148 ,155 ,30 ,135 ,233 ,206 ,85 ,40 ,223 ,140 ,161 ,137 ,13 ,191 ,230 ,66 ,104
,65 ,153 ,45 ,15 ,176 ,84 ,187 ,22]
RT = [82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125]



key = 0xac
tid = 1

vals = [ord(i) for i in "flag_"]



with open("1.txt", mode="r",encoding="utf-8") as f:
content = f.read()

co1 = []
for i in content.split(','):
if len(i) == 1:
co1.append('0' + i)
else:
co1.append(i)


# content = content.replace(",", " ")
content = ' '.join(co1)

a_gift4 = list(bytes.fromhex(content))

# for i in range(len(gift4)):
# gift4[i] = gift4[i] ^ (i & 0xff)

# print(co1)
# gift4 = bytes.fromhex(
# "da 91 07 53 11 98 43 eb 6f 3c 16 1d 16 3b 09 73 4f 0c 58 66 97 28 55 01 f2 ee e4 80 13 ee 81 8a f3 0f dc f1 84 1f bf d2 d3 e5 11 6f 4a f9 fc 66 9d 90 3d ab ab 81 68 49 49 87 1d e0 74 3e 52 00 1d 99 32 12 12 fc f8 9d 52 59 f0 47 ca 66 b7 d0 3a 2b be b6 55 49 ec a4 1d b4 ca 8e 77 5a 63 dd ea 05 4e 7b e9 65 da c8 c1 46 d1 cb 58 be 3c e2 79 e4 85 14 63 ba 31 b3 f0 f5 97 30 9f e6 8b 47 bc 99 26 11 98 7f ff 64 18 fd a3 e4 de 5a ad 4a c9 ec 37 89 21 f3 1b d2 ab aa 02 46 73 b5 13 a9 7f 92 e0 99 4c 94 b7 67 57 f6 1b 1a a9 2f 06 62 1c 28 e5 9d 39 af 25 fd 9c 69 b4 45 05 a8 f2 3b 6c 0a 4e 3e 7c e1 55 1a 2a b8 c5 92 5a 64 93 1c 09 33 61 aa 00 04 17 60 c4 d4 48 ad 05 bc 99 80 ae 79 a7 44 d6 85 4a c7 cf e7 94 e2 36 1c 23 83 cb d8 98 23 19 f8 7d 96 2a 3f 1c 0a ec 69 01 35 ")

s_gift1 = []
a_gift4 = list(a_gift4)
# print(a_gift4)
for jj in range(0, len(a_gift4) // 256):
gift4 = a_gift4[jj*256:(jj+1)*256]
# print(gift4)
# print([hex(i)[2:] for i in gift4]) for i in range(1, len(gift4) -1, 2):
gift4[i],gift4[i+1] = gift4[i+1],gift4[i]

gift4[-1],gift4[0] = gift4[0],gift4[-1]

# print(gift4)
# print([hex(i)[2:] for i in gift4]) # exit(0) gift3 = gift4

# gift3 = bytes.fromhex(
# "04 ce da 4f a1 49 65 8e d4 09 98 c5 89 f5 13 79 e3 1c 14 06 86 e9 f4 87 db e9 83 8d b8 08 d4 f6 16 18 4d d5 fb e2 9a 68 3a fe ac 61 6f 97 4e ac 1a 86 73 4e 55 80 1a e7 35 39 15 07 ff 9e 55 15 f7 fb cd 9a b0 5e 3d 40 b9 61 52 d7 eb 2c 1a b1 cd 4e 70 a3 64 b3 ed 89 49 5d ee da dd 02 c6 7c d6 62 5f cf 3b 41 7e cc 82 b9 64 e5 36 e3 f7 13 90 bd 98 b4 8c f2 bb 37 21 e1 9f 40 f8 9e 1f 16 a4 78 d9 63 aa fa ce e3 30 5d 26 4d 1c eb ac 8e 05 f4 74 d5 14 ad 78 41 e7 b2 4b ae b0 95 50 9e 1c 93 ae 60 01 f1 1b 1d e2 28 3e 65 22 2f 9b 9a b3 a8 02 fa f5 6e 6b 42 49 af 7b 3c 52 0d 2d 39 c2 e6 5d 1d 94 bf 0e 95 66 63 07 1b 10 34 c3 ad 4f 03 02 67 9e d3 a9 aa a0 bb d1 87 4d 7e c8 43 93 82 31 c0 24 e0 cc e5 9f 1b 1e 84 7a df 2d 24 1b ff eb 91 06 38 60 e7 ae a7 ec 58 e3 f6 c4 37 ") # gift3 = list(gift3)
for i in range(0, len(gift3) // 2, 1):
gift3[i * 2], gift3[i * 2 + 1] = gift3[i * 2 + 1], gift3[i * 2]

gift2 = gift3
# print([hex(i)[2:] for i in gift2])
# gift2 = bytes.fromhex("ce 04 4f da 49 a1 8e 65 09 d4 c5 98 f5 89 79 13 1c e3 06 14 e9 86 87 f4 e9 db 8d 83 08 b8 f6 d4 18 16 d5 4d e2 fb 68 9a fe 3a 61 ac 97 6f ac 4e 86 1a 4e 73 80 55 e7 1a 39 35 07 15 9e ff 15 55 fb f7 9a cd 5e b0 40 3d 61 b9 d7 52 2c eb b1 1a 4e cd a3 70 b3 64 89 ed 5d 49 da ee 02 dd 7c c6 62 d6 cf 5f 41 3b cc 7e b9 82 e5 64 e3 36 13 f7 bd 90 b4 98 f2 8c 37 bb e1 21 40 9f 9e f8 16 1f 78 a4 63 d9 fa aa e3 ce 5d 30 4d 26 eb 1c 8e ac f4 05 d5 74 ad 14 41 78 b2 e7 ae 4b 95 b0 9e 50 93 1c 60 ae f1 01 1d 1b 28 e2 65 3e 2f 22 9a 9b a8 b3 fa 02 6e f5 42 6b af 49 3c 7b 0d 52 39 2d e6 c2 1d 5d bf 94 95 0e 63 66 1b 07 34 10 ad c3 03 4f 67 02 d3 9e aa a9 bb a0 87 d1 7e 4d 43 c8 82 93 c0 31 e0 24 e5 cc 1b 9f 84 1e df 7a 24 2d ff 1b 91 eb 38 06 e7 60 a7 ae 58 ec f6 e3 37 c4 ") # gift2 = list(gift2)
cur_c = gift2[0]

for i in range(len(gift2) - 1, -1, -1):
gift2[i] = gift2[i] ^ cur_c ^ key
cur_c = gift2[i]

# print(cur_c)
# print(gift2) s_gift1 += gift2
# print([hex(i)[2:] for i in gift2])

# exit(0)
gift1 = gift2
l = gift1

# 是否推算 0 ,运算的比较慢, 使用cuda来算
if False:
for j in range(len(l)):
state = l[j]
for _ in range(5):
for i in range(0xa00000 - 1, -1, -1):
state = state ^ (i & 0xFF)
state = ((state << 4) | (state >> 4)) & 0xff
state = RT[state]
state = state & 0xff

# print(hex(state))

state = ((state >> 4) | (state << 4))
state = state & 0xff
res = state ^ ((j * 73 + key) & 0xFF)
state = state & 0xff
l[j] = res
print(hex(res))

print(l)
print(s_gift1)

最后调用cuda计算 最终的内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  



RT = [82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125]



import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
from pycuda.compiler import SourceModule

# 假设RT表已定义,这里用示例数据填充
RT = np.array(RT, dtype=np.uint8)
key = 0xAc # 示例密钥


import a14

a14 = a14.data1
a_gift4 = a14

all_list = []

for jj in range(0, len(a_gift4) // 256):
gift4 = a_gift4[jj*256:(jj+1)*256]

# 定义CUDA内核
mod = SourceModule("""
__constant__ unsigned char RT[256]; __global__ void compute_kernel(unsigned char* l, int len_l, int key) {
int j = blockIdx.x * blockDim.x + threadIdx.x; if (j >= len_l) return; unsigned char state = l[j];
for(int iter=0; iter<5; iter++){ for(int i=0xA00000-1; i>=0; i--){ // 关键修改点
int temp = (int)state ^ (i & 0xFF); // 提升为32位运算
temp = ((temp << 4) | (temp >> 4)) & 0xFF; // 显式截断
temp = RT[temp & 0xFF]; // 双重索引保护
state = (unsigned char)temp; } } state = ((state << 4) | (state >> 4)) & 0xFF;
unsigned char res = state ^ ((j * 73 + key) & 0xFF); l[j] = res & 0xff; }""")

# 将RT表复制到常量内存
rt_ptr = mod.get_global("RT")[0]
cuda.memcpy_htod(rt_ptr, RT)

import a14

# 准备测试数据
l = np.array(gift4, dtype=np.uint8)
print(l)
l_gpu = cuda.mem_alloc(l.nbytes)
cuda.memcpy_htod(l_gpu, l)

# 获取内核函数
compute_func = mod.get_function("compute_kernel")

# 计算线程配置
block_size = 256
grid_size = (len(l) + block_size - 1) // block_size

# 执行内核
compute_func(l_gpu,
np.int32(len(l)),
np.int32(key),
block=(block_size, 1, 1),
grid=(grid_size, 1))

# 取回结果
cuda.memcpy_dtoh(l, l_gpu)

all_list += list(l)

with open("11.png", mode="wb") as f:

f.write(bytes(list(all_list)))

最后发现,结果是个图片,打开即为flag.


2025-aliyunctf-easy-cuda-rev-writeup
https://pwner.top/2025/02/27/2025aliyunctf-easy-cuda-rev/
作者
m1n9yu3
发布于
2025年2月27日
许可协议