Seregon/ShadPKG

A tool for deriving PKG packet encryption keys for ps4 written in c++

C++/47.3 KB/No license
core/decompiler/analysis/SymbolAnalysis.cpp
ShadPKG / core / decompiler / analysis / SymbolAnalysis.cpp
1#include "SymbolAnalysis.h"
2#include "SymbolDatabase.h"
3#include <cstring>
4#include <iostream>
5 
6namespace ShadPKG::Decompiler::Analysis {
7 
8SymbolAnalysis::SymbolAnalysis(const std::vector<uint8_t> &rawData,
9 uint64_t baseAddress,
10 std::shared_ptr<SymbolDatabase> db)
11 : data_(rawData), baseAddress_(baseAddress), db_(db) {}
12 
13void SymbolAnalysis::analyze() {
14 if (data_.size() < sizeof(Elf64_Ehdr))
15 return;
16 
17 // Check magic
18 if (data_[0] != 0x7F || data_[1] != 'E' || data_[2] != 'L' || data_[3] != 'F')
19 return;
20 
21 parseELF();
22}
23 
24std::optional<std::string>
25SymbolAnalysis::getFunctionName(uint64_t address) const {
26 if (db_) {
27 auto name = db_->getSymbolName(address);
28 if (!name.empty() && name.find("loc_") == std::string::npos) {
29 return name;
30 }
31 // If db returned a default loc_ name, check if we want to return nullopt
32 // to indicate "no symbol found" vs "default name".
33 // For call targets, returning a name is fine.
34 // But preserving specific nullopt semantic for "unknown" might be safer?
35 // Let's rely on DB's getSymbol returning optional.
36 auto sym = db_->getSymbol(address);
37 if (sym)
38 return sym->name;
39 }
40 return std::nullopt;
41}
42 
43std::optional<std::string>
44SymbolAnalysis::getStringLiteral(uint64_t address) const {
45 // Check if address falls into .rodata
46 for (const auto &section : rodataSections_) {
47 if (address >= section.va && address < section.va + section.size) {
48 uint64_t offsetInSec = address - section.va;
49 uint64_t fileOffset = section.fileOffset + offsetInSec;
50 
51 if (fileOffset >= data_.size())
52 return std::nullopt;
53 
54 // Scan for null terminator
55 std::string str;
56 size_t maxLen = 256;
57 for (size_t i = 0; i < maxLen; ++i) {
58 if (fileOffset + i >= data_.size())
59 break;
60 char c = static_cast<char>(data_[fileOffset + i]);
61 if (c == 0)
62 break;
63 // Relaxed check: printable ASCII or common whitespace
64 if ((c < 32 || c >= 127) && c != '\n' && c != '\t' && c != '\r')
65 return std::nullopt; // Not a clean string
66 str += c;
67 }
68 if (str.length() >= 3) // Min length 3 to avoid noise
69 return str;
70 }
71 }
72 return std::nullopt;
73}
74 
75std::optional<SymbolInfo>
76SymbolAnalysis::getSymbol(uint64_t address) const {
77 if (db_) return db_->getSymbol(address);
78 return std::nullopt;
79}
80 
81bool SymbolAnalysis::isPLTStub(uint64_t address) const {
82 return pltEntries_.find(address) != pltEntries_.end();
83}
84 
85// ═══════════════════════════════════════════════════════════════════════════
86// ELF Parsing Logic
87// ═══════════════════════════════════════════════════════════════════════════
88 
89void SymbolAnalysis::parseELF() {
90 const auto *ehdr = reinterpret_cast<const Elf64_Ehdr *>(data_.data());
91 
92 if (ehdr->e_shoff == 0 || ehdr->e_shnum == 0)
93 return;
94 
95 parseSections(ehdr->e_shoff, ehdr->e_shnum, ehdr->e_shstrndx);
96}
97 
98void SymbolAnalysis::parseSections(uint64_t shoff, uint16_t shnum,
99 uint16_t shstrndx) {
100 if (shoff + shnum * sizeof(Elf64_Shdr) > data_.size())
101 return;
102 
103 const auto *shdrs =
104 reinterpret_cast<const Elf64_Shdr *>(data_.data() + shoff);
105 
106 // Get Section Header String Table
107 if (shstrndx >= shnum)
108 return;
109 const auto &strSec = shdrs[shstrndx];
110 if (strSec.sh_offset + strSec.sh_size > data_.size())
111 return;
112 const char *strTab =
113 reinterpret_cast<const char *>(data_.data() + strSec.sh_offset);
114 
115 // Scan sections
116 uint64_t dynSymOffset = 0, dynSymSize = 0, dynSymEnt = 0;
117 uint64_t dynStrOffset = 0;
118 uint64_t relaPltOffset = 0, relaPltSize = 0, relaPltEnt = 0;
119 uint64_t pltAddr = 0, pltSize = 0;
120 
121 for (int i = 0; i < shnum; ++i) {
122 const auto &sh = shdrs[i];
123 const char *name = strTab + sh.sh_name;
124 
125 // .dynsym
126 if (sh.sh_type == 11 /* SHT_DYNSYM */) { // or via name
127 dynSymOffset = sh.sh_offset;
128 dynSymSize = sh.sh_size;
129 dynSymEnt = sh.sh_entsize;
130 }
131 // .dynstr
132 else if (sh.sh_type == 3 /* SHT_STRTAB */ &&
133 std::strcmp(name, ".dynstr") == 0) {
134 dynStrOffset = sh.sh_offset;
135 }
136 // .rela.plt
137 else if (sh.sh_type == 4 /* SHT_RELA */ &&
138 std::strcmp(name, ".rela.plt") == 0) {
139 relaPltOffset = sh.sh_offset;
140 relaPltSize = sh.sh_size;
141 relaPltEnt = sh.sh_entsize;
142 }
143 // .plt
144 else if (std::strcmp(name, ".plt") == 0) {
145 pltAddr = sh.sh_addr;
146 pltSize = sh.sh_size;
147 }
148 // .rodata or .data
149 else if (std::strcmp(name, ".rodata") == 0 || std::strcmp(name, ".data") == 0) {
150 rodataSections_.push_back({sh.sh_addr, sh.sh_size, sh.sh_offset});
151 }
152 }
153 
154 // Parse Symbols if found
155 if (dynSymOffset && dynStrOffset) {
156 parseSymbols(dynSymOffset, dynStrOffset, dynSymSize, dynSymEnt);
157 }
158 
159 // Parse Relocations to identify PLT calls
160 if (relaPltOffset && dynSymOffset && dynStrOffset && pltAddr) {
161 parseRelocations(relaPltOffset, relaPltSize, relaPltEnt, dynStrOffset,
162 dynSymOffset, true);
163 }
164}
165 
166void SymbolAnalysis::parseSymbols(uint64_t symOffset, uint64_t strOffset,
167 uint64_t size, uint64_t entSize) {
168 if (entSize < sizeof(Elf64_Sym))
169 return;
170 
171 size_t count = size / entSize;
172 const uint8_t *ptr = data_.data() + symOffset;
173 
174 for (size_t i = 0; i < count; ++i) {
175 const auto *sym = reinterpret_cast<const Elf64_Sym *>(ptr + i * entSize);
176 uint8_t type = (sym->st_info & 0xf);
177 
178 if (sym->st_value != 0 && (type == 2 /* STT_FUNC */ || type == 1 /* STT_OBJECT */)) {
179 std::string name = readString(strOffset + sym->st_name);
180 if (!name.empty()) {
181 if (db_) {
182 SymbolType stype = (type == 2) ? SymbolType::Function : SymbolType::GlobalVariable;
183 db_->addSymbol(sym->st_value, name, stype, SymbolSource::Auto);
184 }
185 }
186 }
187 }
188}
189 
190void SymbolAnalysis::parseRelocations(uint64_t relOffset, uint64_t size,
191 uint64_t entSize, uint64_t dynStrOffset,
192 uint64_t dynSymOffset, bool isRela) {
193 if (entSize < sizeof(Elf64_Rela))
194 return;
195 
196 size_t count = size / entSize;
197 const uint8_t *ptr = data_.data() + relOffset;
198 const auto *syms =
199 reinterpret_cast<const Elf64_Sym *>(data_.data() + dynSymOffset);
200 
201 // We attempt to map PLT Stubs to Symbols.
202 // Assuming PLT starts at .plt address, and each entry is 16 bytes.
203 // PLT[0] is special. PLT[1] matches rela[0].
204 
205 // Find .plt address again? Passed via member or scan again?
206 // Let's assume we map GOT address (r_offset) to name?
207 // Use heuristic: We need to know who calls 'malloc'.
208 // The code calls 0x400560 (PLTStub). PLTStub jumps to *GOT_Entry.
209 // We want 0x400560 -> "malloc".
210 
211 // We don't easily know the PLT Stub address for a given relocation without
212 // parsing the PLT section instructions or assuming the 16-byte fixed layout.
213 // Let's try locating the .plt section header again.
214 
215 uint64_t pltAddr = 0;
216 // (Re-scan sections for .plt sh_addr - inefficient but ok for now)
217 // Actually we iterate sections in parseSections.
218 // We need to store it or accept a small hack.
219 const auto *ehdr = reinterpret_cast<const Elf64_Ehdr *>(data_.data());
220 const auto *shdrs =
221 reinterpret_cast<const Elf64_Shdr *>(data_.data() + ehdr->e_shoff);
222 const char *strTab = reinterpret_cast<const char *>(
223 data_.data() + shdrs[ehdr->e_shstrndx].sh_offset);
224 
225 for (int i = 0; i < ehdr->e_shnum; ++i) {
226 if (std::strcmp(strTab + shdrs[i].sh_name, ".plt") == 0) {
227 pltAddr = shdrs[i].sh_addr;
228 break;
229 }
230 }
231 
232 if (pltAddr == 0)
233 return;
234 
235 // Process Relocations
236 for (size_t i = 0; i < count; ++i) {
237 const auto *rela = reinterpret_cast<const Elf64_Rela *>(ptr + i * entSize);
238 uint32_t symIdx = (rela->r_info >> 32);
239 // uint32_t type = (rela->r_info & 0xffffffff);
240 
241 // Get Symbol Name
242 // (Bounds check ignored for brevity)
243 const auto &sym = syms[symIdx];
244 std::string name = readString(dynStrOffset + sym.st_name);
245 
246 if (!name.empty()) {
247 // Assume PLT stub index corresponds to relocation index
248 // PLT entries usually 16 bytes. First is PLT header (0).
249 uint64_t stubAddr = pltAddr + (i + 1) * 16;
250 
251 if (db_)
252 db_->addSymbol(stubAddr, name, SymbolType::Function,
253 SymbolSource::Auto);
254 }
255 }
256}
257 
258std::string SymbolAnalysis::readString(uint64_t offset) const {
259 if (offset >= data_.size())
260 return "";
261 return std::string(reinterpret_cast<const char *>(data_.data() + offset));
262}
263 
264} // namespace ShadPKG::Decompiler::Analysis
265