Seregon/ShadPKG

A tool for deriving PKG packet encryption keys for ps4 written in c++

C++/47.3 KB/No license
core/decompiler/analysis/DominatorAnalysis.cpp
ShadPKG / core / decompiler / analysis / DominatorAnalysis.cpp
1#include "DominatorAnalysis.h"
2#include <algorithm>
3#include <limits>
4#include <queue>
5#include <stack>
6 
7namespace ShadPKG::Decompiler::Analysis {
8 
9// Static empty set for returning references
10const std::set<uint64_t> DominatorAnalysis::emptySet_;
11 
12// ╔═══════════════════════════════════════════════════════════════════════════╗
13// ║ MAIN ANALYSIS ENTRY ║
14// ╚═══════════════════════════════════════════════════════════════════════════╝
15 
16void DominatorAnalysis::analyze(const std::shared_ptr<IR::Function> &func) {
17 if (!func || func->basicBlocks.empty())
18 return;
19 
20 // ─────────────────────────────────────────────────────────────────────────
21 // Step 1: Build adjacency lists from basic blocks
22 // ─────────────────────────────────────────────────────────────────────────
23 
24 successors_.clear();
25 predecessors_.clear();
26 
27 for (const auto &bb : func->basicBlocks) {
28 uint64_t id = bb->id;
29 successors_[id] = bb->successors;
30 
31 for (uint64_t succ : bb->successors) {
32 predecessors_[succ].push_back(id);
33 }
34 }
35 
36 entryBlock_ = func->basicBlocks[0]->id;
37 
38 // ─────────────────────────────────────────────────────────────────────────
39 // Step 2: Compute reverse post-order
40 // ─────────────────────────────────────────────────────────────────────────
41 
42 computeReversePostOrder(func);
43 
44 // ─────────────────────────────────────────────────────────────────────────
45 // Step 3: Compute dominators (Cooper-Harvey-Kennedy)
46 // ─────────────────────────────────────────────────────────────────────────
47 
48 computeDominators();
49 
50 // ─────────────────────────────────────────────────────────────────────────
51 // Step 4: Compute post-dominators (reverse CFG)
52 // ─────────────────────────────────────────────────────────────────────────
53 
54 computePostDominators();
55 
56 // ─────────────────────────────────────────────────────────────────────────
57 // Step 5: Detect back-edges and build loops
58 // ─────────────────────────────────────────────────────────────────────────
59 
60 detectBackEdges();
61 buildLoops();
62}
63 
64// ╔═══════════════════════════════════════════════════════════════════════════╗
65// ║ REVERSE POST-ORDER ║
66// ║ ║
67// ║ DFS traversal, number nodes in post-order, then reverse. ║
68// ║ This gives us an order where dominators come before dominated nodes. ║
69// ║ ║
70// ║ Entry ║
71// ║ │ DFS: Entry→A→B→C (post: C=1,B=2,A=3,Entry=4) ║
72// ║ ▼ RPO: Entry,A,B,C (reversed post-order) ║
73// ║ A ║
74// ║ │ ║
75// ║ ▼ ║
76// ║ B ║
77// ║ │ ║
78// ║ ▼ ║
79// ║ C ║
80// ╚═══════════════════════════════════════════════════════════════════════════╝
81 
82void DominatorAnalysis::computeReversePostOrder(
83 const std::shared_ptr<IR::Function> &func) {
84 rpo_.clear();
85 rpoNumber_.clear();
86 
87 std::set<uint64_t> visited;
88 std::vector<uint64_t> postOrder;
89 
90 // Iterative DFS with explicit stack
91 std::stack<std::pair<uint64_t, int>> stack;
92 stack.push({entryBlock_, 0});
93 
94 while (!stack.empty()) {
95 auto &[blockId, succIdx] = stack.top();
96 
97 if (visited.find(blockId) == visited.end() && succIdx == 0) {
98 visited.insert(blockId);
99 }
100 
101 auto &succs = successors_[blockId];
102 
103 // Find next unvisited successor
104 bool foundNext = false;
105 while (succIdx < static_cast<int>(succs.size())) {
106 uint64_t succ = succs[succIdx];
107 succIdx++;
108 if (visited.find(succ) == visited.end()) {
109 stack.push({succ, 0});
110 foundNext = true;
111 break;
112 }
113 }
114 
115 if (!foundNext) {
116 // All successors visited, add to post-order
117 postOrder.push_back(blockId);
118 stack.pop();
119 }
120 }
121 
122 // Reverse to get RPO
123 rpo_.assign(postOrder.rbegin(), postOrder.rend());
124 
125 // Assign RPO numbers
126 for (size_t i = 0; i < rpo_.size(); ++i) {
127 rpoNumber_[rpo_[i]] = static_cast<int>(i);
128 }
129}
130 
131// ╔═══════════════════════════════════════════════════════════════════════════╗
132// ║ COOPER-HARVEY-KENNEDY ALGORITHM ║
133// ║ ║
134// ║ Iterative dominator computation. Efficient for reducible CFGs. ║
135// ║ ║
136// ║ For each node n (in RPO, except entry): ║
137// ║ new_idom = first processed predecessor ║
138// ║ for each other predecessor p: ║
139// ║ if idom[p] already computed: ║
140// ║ new_idom = intersect(new_idom, p) ║
141// ║ idom[n] = new_idom ║
142// ║ ║
143// ║ Repeat until no changes. ║
144// ╚═══════════════════════════════════════════════════════════════════════════╝
145 
146void DominatorAnalysis::computeDominators() {
147 idom_.clear();
148 doms_.clear();
149 
150 // Entry dominates itself
151 idom_[entryBlock_] = entryBlock_;
152 
153 bool changed = true;
154 while (changed) {
155 changed = false;
156 
157 // Process all blocks in RPO (skip entry)
158 for (size_t i = 1; i < rpo_.size(); ++i) {
159 uint64_t b = rpo_[i];
160 auto &preds = predecessors_[b];
161 
162 if (preds.empty())
163 continue;
164 
165 // Find first predecessor with computed idom
166 uint64_t newIdom = 0;
167 bool foundFirst = false;
168 
169 for (uint64_t p : preds) {
170 if (idom_.find(p) != idom_.end()) {
171 newIdom = p;
172 foundFirst = true;
173 break;
174 }
175 }
176 
177 if (!foundFirst)
178 continue;
179 
180 // Intersect with other processed predecessors
181 for (uint64_t p : preds) {
182 if (p == newIdom)
183 continue;
184 if (idom_.find(p) != idom_.end()) {
185 newIdom = intersect(p, newIdom);
186 }
187 }
188 
189 // Update if changed
190 if (idom_.find(b) == idom_.end() || idom_[b] != newIdom) {
191 idom_[b] = newIdom;
192 changed = true;
193 }
194 }
195 }
196 
197 // Build full dominator sets from idom tree
198 for (const auto &[block, _] : idom_) {
199 std::set<uint64_t> &domSet = doms_[block];
200 uint64_t current = block;
201 while (true) {
202 domSet.insert(current);
203 if (current == entryBlock_)
204 break;
205 current = idom_[current];
206 }
207 }
208}
209 
210// ─────────────────────────────────────────────────────────────────────────────
211// Intersect: Find common dominator by walking up the dominator tree
212// ─────────────────────────────────────────────────────────────────────────────
213 
214uint64_t DominatorAnalysis::intersect(uint64_t b1, uint64_t b2) const {
215 auto getNumber = [this](uint64_t b) -> int {
216 auto it = rpoNumber_.find(b);
217 return it != rpoNumber_.end() ? it->second
218 : std::numeric_limits<int>::max();
219 };
220 
221 uint64_t finger1 = b1;
222 uint64_t finger2 = b2;
223 
224 while (finger1 != finger2) {
225 while (getNumber(finger1) > getNumber(finger2)) {
226 auto it = idom_.find(finger1);
227 if (it == idom_.end() || it->second == finger1)
228 return entryBlock_;
229 finger1 = it->second;
230 }
231 while (getNumber(finger2) > getNumber(finger1)) {
232 auto it = idom_.find(finger2);
233 if (it == idom_.end() || it->second == finger2)
234 return entryBlock_;
235 finger2 = it->second;
236 }
237 }
238 
239 return finger1;
240}
241 
242// ╔═══════════════════════════════════════════════════════════════════════════╗
243// ║ POST-DOMINATORS ║
244// ║ ║
245// ║ Same algorithm but on reversed CFG, starting from exit nodes. ║
246// ║ Used to detect if-then-else merge points. ║
247// ╚═══════════════════════════════════════════════════════════════════════════╝
248 
249void DominatorAnalysis::computePostDominators() {
250 ipdom_.clear();
251 pdoms_.clear();
252 
253 // Find exit blocks (no successors or RET)
254 std::vector<uint64_t> exitBlocks;
255 for (const auto &[blockId, succs] : successors_) {
256 if (succs.empty()) {
257 exitBlocks.push_back(blockId);
258 }
259 }
260 
261 if (exitBlocks.empty())
262 return;
263 
264 // Create virtual exit node
265 exitBlock_ = 0xFFFFFFFFFFFFFFFF;
266 
267 // Build reverse CFG
268 std::map<uint64_t, std::vector<uint64_t>>
269 revSucc; // Reversed: succs become preds
270 std::map<uint64_t, std::vector<uint64_t>> revPred;
271 
272 for (const auto &[blockId, succs] : successors_) {
273 for (uint64_t s : succs) {
274 revSucc[s].push_back(blockId);
275 revPred[blockId].push_back(s);
276 }
277 }
278 
279 // Connect exit blocks to virtual exit
280 for (uint64_t exit : exitBlocks) {
281 revSucc[exitBlock_].push_back(exit);
282 revPred[exit].push_back(exitBlock_);
283 }
284 
285 // Compute RPO on reverse CFG
286 std::vector<uint64_t> revRpo;
287 std::map<uint64_t, int> revRpoNumber;
288 std::set<uint64_t> visited;
289 std::vector<uint64_t> postOrder;
290 
291 std::stack<std::pair<uint64_t, int>> stack;
292 stack.push({exitBlock_, 0});
293 
294 while (!stack.empty()) {
295 auto &[blockId, succIdx] = stack.top();
296 
297 if (visited.find(blockId) == visited.end() && succIdx == 0) {
298 visited.insert(blockId);
299 }
300 
301 auto &succs = revSucc[blockId];
302 
303 bool foundNext = false;
304 while (succIdx < static_cast<int>(succs.size())) {
305 uint64_t succ = succs[succIdx];
306 succIdx++;
307 if (visited.find(succ) == visited.end()) {
308 stack.push({succ, 0});
309 foundNext = true;
310 break;
311 }
312 }
313 
314 if (!foundNext) {
315 postOrder.push_back(blockId);
316 stack.pop();
317 }
318 }
319 
320 revRpo.assign(postOrder.rbegin(), postOrder.rend());
321 for (size_t i = 0; i < revRpo.size(); ++i) {
322 revRpoNumber[revRpo[i]] = static_cast<int>(i);
323 }
324 
325 // Compute post-dominators using same algorithm
326 ipdom_[exitBlock_] = exitBlock_;
327 
328 bool changed = true;
329 while (changed) {
330 changed = false;
331 
332 for (size_t i = 1; i < revRpo.size(); ++i) {
333 uint64_t b = revRpo[i];
334 auto &preds = revPred[b];
335 
336 if (preds.empty())
337 continue;
338 
339 uint64_t newIpdom = 0;
340 bool foundFirst = false;
341 
342 for (uint64_t p : preds) {
343 if (ipdom_.find(p) != ipdom_.end()) {
344 newIpdom = p;
345 foundFirst = true;
346 break;
347 }
348 }
349 
350 if (!foundFirst)
351 continue;
352 
353 for (uint64_t p : preds) {
354 if (p == newIpdom)
355 continue;
356 if (ipdom_.find(p) != ipdom_.end()) {
357 // Intersect on reverse RPO
358 uint64_t f1 = p, f2 = newIpdom;
359 while (f1 != f2) {
360 while (revRpoNumber[f1] > revRpoNumber[f2]) {
361 if (ipdom_.find(f1) == ipdom_.end())
362 break;
363 f1 = ipdom_[f1];
364 }
365 while (revRpoNumber[f2] > revRpoNumber[f1]) {
366 if (ipdom_.find(f2) == ipdom_.end())
367 break;
368 f2 = ipdom_[f2];
369 }
370 }
371 newIpdom = f1;
372 }
373 }
374 
375 if (ipdom_.find(b) == ipdom_.end() || ipdom_[b] != newIpdom) {
376 ipdom_[b] = newIpdom;
377 changed = true;
378 }
379 }
380 }
381}
382 
383// ╔═══════════════════════════════════════════════════════════════════════════╗
384// ║ BACK-EDGE DETECTION ║
385// ║ ║
386// ║ A back-edge is an edge A → B where B dominates A. ║
387// ║ Each back-edge indicates a natural loop with B as the header. ║
388// ║ ║
389// ║ ┌──────────────────────┐ ║
390// ║ │ │ ║
391// ║ ▼ │ Back-edge (A→B) ║
392// ║ ┌───────┐ │ ║
393// ║ │ B │ ◄── Loop Header │ ║
394// ║ └───────┘ │ ║
395// ║ │ │ ║
396// ║ ▼ │ ║
397// ║ ┌───────┐ │ ║
398// ║ │ A │ ─────────────────┘ ║
399// ║ └───────┘ ║
400// ╚═══════════════════════════════════════════════════════════════════════════╝
401 
402void DominatorAnalysis::detectBackEdges() {
403 backEdges_.clear();
404 
405 for (const auto &[blockId, succs] : successors_) {
406 for (uint64_t succ : succs) {
407 // Check if succ dominates blockId → back-edge
408 if (dominates(succ, blockId)) {
409 backEdges_.push_back({blockId, succ});
410 }
411 }
412 }
413}
414 
415// ╔═══════════════════════════════════════════════════════════════════════════╗
416// ║ LOOP CONSTRUCTION ║
417// ║ ║
418// ║ For each back-edge (latch → header): ║
419// ║ 1. Header is the loop header ║
420// ║ 2. Collect all blocks between header and latch (reverse DFS) ║
421// ║ 3. Find exit blocks (edges leaving the loop) ║
422// ╚═══════════════════════════════════════════════════════════════════════════╝
423 
424void DominatorAnalysis::buildLoops() {
425 loops_.clear();
426 blockToLoop_.clear();
427 
428 for (const auto &[latch, header] : backEdges_) {
429 LoopInfo loop;
430 loop.headerBlock = header;
431 loop.latchBlock = latch;
432 
433 // Collect loop body
434 collectLoopBody(header, latch, loop.bodyBlocks);
435 
436 // Find exit blocks
437 for (uint64_t block : loop.bodyBlocks) {
438 for (uint64_t succ : successors_[block]) {
439 if (loop.bodyBlocks.find(succ) == loop.bodyBlocks.end()) {
440 loop.exitBlocks.push_back(succ);
441 }
442 }
443 }
444 
445 // Determine if do-while: if header has only one predecessor from outside
446 // loop and condition is at latch, it's do-while
447 auto &headerPreds = predecessors_[header];
448 int externalPreds = 0;
449 for (uint64_t p : headerPreds) {
450 if (loop.bodyBlocks.find(p) == loop.bodyBlocks.end()) {
451 externalPreds++;
452 }
453 }
454 
455 // Simple heuristic: if latch has the conditional branch, it's likely
456 // do-while
457 if (successors_[latch].size() == 2 && externalPreds == 1) {
458 loop.isDoWhile = true;
459 }
460 
461 // Map blocks to loop
462 size_t loopIdx = loops_.size();
463 for (uint64_t block : loop.bodyBlocks) {
464 blockToLoop_[block] = loopIdx;
465 }
466 
467 loops_.push_back(std::move(loop));
468 }
469}
470 
471// ─────────────────────────────────────────────────────────────────────────────
472// Collect loop body via reverse DFS from latch to header
473// ─────────────────────────────────────────────────────────────────────────────
474 
475void DominatorAnalysis::collectLoopBody(uint64_t header, uint64_t latch,
476 std::set<uint64_t> &body) {
477 body.insert(header);
478 
479 if (header == latch)
480 return;
481 
482 std::stack<uint64_t> worklist;
483 worklist.push(latch);
484 body.insert(latch);
485 
486 while (!worklist.empty()) {
487 uint64_t block = worklist.top();
488 worklist.pop();
489 
490 for (uint64_t pred : predecessors_[block]) {
491 if (body.find(pred) == body.end()) {
492 body.insert(pred);
493 worklist.push(pred);
494 }
495 }
496 }
497}
498 
499// ╔═══════════════════════════════════════════════════════════════════════════╗
500// ║ QUERY METHODS ║
501// ╚═══════════════════════════════════════════════════════════════════════════╝
502 
503uint64_t DominatorAnalysis::getImmediateDominator(uint64_t blockId) const {
504 auto it = idom_.find(blockId);
505 return (it != idom_.end()) ? it->second : 0;
506}
507 
508bool DominatorAnalysis::dominates(uint64_t dominator,
509 uint64_t dominated) const {
510 auto it = doms_.find(dominated);
511 if (it == doms_.end())
512 return false;
513 return it->second.find(dominator) != it->second.end();
514}
515 
516bool DominatorAnalysis::strictlyDominates(uint64_t dominator,
517 uint64_t dominated) const {
518 return (dominator != dominated) && dominates(dominator, dominated);
519}
520 
521const std::set<uint64_t> &
522DominatorAnalysis::getDominators(uint64_t blockId) const {
523 auto it = doms_.find(blockId);
524 return (it != doms_.end()) ? it->second : emptySet_;
525}
526 
527uint64_t DominatorAnalysis::getImmediatePostDominator(uint64_t blockId) const {
528 auto it = ipdom_.find(blockId);
529 return (it != ipdom_.end()) ? it->second : 0;
530}
531 
532bool DominatorAnalysis::postDominates(uint64_t postDom,
533 uint64_t dominated) const {
534 auto it = pdoms_.find(dominated);
535 if (it == pdoms_.end())
536 return false;
537 return it->second.find(postDom) != it->second.end();
538}
539 
540bool DominatorAnalysis::isLoopHeader(uint64_t blockId) const {
541 for (const auto &loop : loops_) {
542 if (loop.headerBlock == blockId)
543 return true;
544 }
545 return false;
546}
547 
548const LoopInfo *DominatorAnalysis::getLoopFor(uint64_t blockId) const {
549 auto it = blockToLoop_.find(blockId);
550 if (it != blockToLoop_.end() && it->second < loops_.size()) {
551 return &loops_[it->second];
552 }
553 return nullptr;
554}
555 
556} // namespace ShadPKG::Decompiler::Analysis
557