repr 0.1
Reconstructable string representations and more
Loading...
Searching...
No Matches
denoise.h
Go to the documentation of this file.
1#pragma once
2
9#include <cstddef>
10#include <set>
11#include <type_traits>
13#include <librepr/macro/util.h>
15
16namespace librepr::detail {
17namespace msvc {
18// For packing pos/len pairs
19using uhalfptr_t =
20 std::conditional_t<sizeof(void*) == 8,
22// Extended into the signed world
25
26struct SymToken {
29 explicit LIBREPR_HINT_INLINE operator bool() const {
30 return (loc >= 0);
31 }
32};
33
34struct SymLexer {
35protected:
37 : begin(sym.data()), cursor(begin),
38 buf(sym), nest_depth(), current_lexeme(), last_lexeme() {
39 this->end = begin + sym.size();
40 }
41
42 static LIBREPR_HINT_INLINE bool isWord(char c) {
43 return (c >= 'A' && c <= 'Z') ||
44 (c >= 'a' && c <= 'z') ||
45 (c == '_');
46 }
47
49 return isWord(c) || (c == ':') || (c >= '0' && c <= '9');
50 }
51
53 last_lexeme = current_lexeme;
54 cursor += last_lexeme.length;
55 auto pos = advance();
56 if(this->cursor == end) [[unlikely]] {
57 return {};
58 } else if(isWord(*cursor)) {
59 auto epos = find_nonword();
60 auto len = uhalfptr_t(epos - pos);
61 this->current_lexeme = SymToken{pos, len};
62 } else {
63 this->current_lexeme = SymToken{pos, 1};
64 }
65 return this->current_lexeme;
66 }
67
69 return this->current_lexeme;
70 }
71
73 return this->last_lexeme;
74 }
75
77 return this->begin[current_lexeme.loc];
78 }
79
81 if(last_lexeme.loc < 0) [[unlikely]] {
82 return '\0';
83 }
84 return this->begin[last_lexeme.loc];
85 }
86
88 auto pos = find_next(cursor + current_lexeme.length);
89 if(pos == -1) [[unlikely]] {
90 return '\0';
91 }
92 return this->begin[pos];
93 }
94
98
100 auto* lloc = begin + tok.loc;
101 LIBREPR_ASSERT(tok.length > 0,
102 "Invalid token length {}.", tok.length);
103 return {lloc, lloc + tok.length};
104 }
105
106private:
107 ihalfptr_t advance() {
108 auto pos = find_next();
109 if(pos != -1) {
110 cursor = begin + pos;
111 } else {
112 cursor = end;
113 }
114 return ihalfptr_t(pos);
115 }
116
117 // Locates the next offset from begin
118 std::intptr_t find_next(
119 const char* pos = nullptr) const {
120 auto* lloc = (!pos) ? cursor : pos;
121 if(lloc == end) return -1;
122 LIBREPR_ASSERT((lloc >= begin && lloc < end),
123 "Invalid position!");
124 for(; lloc < end; ++lloc) {
125 char c = *lloc;
126 if(c != ' ') {
127 return std::intptr_t(lloc - begin);
128 }
129 }
130 return -1;
131 }
132
133 // Locates the next offset from begin
134 std::intptr_t find_nonword() const {
135 auto* lloc = cursor;
136 for(; lloc < end; ++lloc) {
138 return std::intptr_t(lloc - begin);
139 }
140 }
141 return std::intptr_t(end - begin);
142 }
143
144private:
145 const char* begin;
146 const char* cursor;
147 const char* end;
148 SymToken current_lexeme;
149 SymToken last_lexeme;
150
151protected:
154};
155
161
162 void parse() {
163 while(auto tok = SymLexer::next()) {
164 if(char c = SymLexer::current_first();
165 !SymLexer::isWord(c))
166 {
167 this->handle_symbol(c);
168 buf.write(c);
169 } else if(!is_keyword(tok)) {
170 this->handle_ident(tok);
171 if(this->is_replacement(tok)) [[unlikely]] {
172 continue;
173 }
175 }
176 }
177 }
178
180 return std::move(SymLexer::buf.extract());
181 }
182
183private:
184 LIBREPR_HINT_INLINE char last_written() {
185 if(buf->empty()) [[unlikely]] {
186 return '\0';
187 }
188 return buf->back();
189 }
190
191 LIBREPR_HINT_INLINE void handle_symbol(char c) {
192 char lc = SymLexer::last_first();
193 if(c == '>') {
194 if(lc == '>') {
195 buf.write(' ');
196 }
199 "Invalid nest level {}.", SymLexer::nest_depth);
200 } else if(c == '<') {
202 }
203 }
204
205 LIBREPR_HINT_INLINE void handle_ident(SymToken) {
206 char lc = last_written();
208 buf.write(' ');
209 } else if(SymLexer::nest_depth > 0) {
210 if(lc == ',') buf.write(' ');
211 }
212 }
213
214 bool is_keyword(SymToken tok) {
216 "struct", "class", "union", "enum",
217 "__ptr64", "__cdecl", "__stdcall",
218 "__thiscall", "__fastcall", ""
219 };
220 auto tstr = SymLexer::to_sv(tok);
221 return keywords.contains(tstr);
222 }
223
224 bool is_replacement(SymToken tok) {
225 auto tstr = SymLexer::to_sv(tok);
226 if(tstr == "std::nullptr_t") {
227 buf.write("decltype(nullptr)");
228 return true;
229 } else if(tstr == "__int64") {
230 buf.write("long long");
231 return true;
232 }
233 return false;
234 }
235};
236
237} // namespace msvc
238
242 parser.parse();
243 return std::move(parser.extract());
244}
245} // namespace librepr::detail
246
247static_assert(sizeof(void*) == sizeof(librepr::detail::msvc::SymToken));
#define LIBREPR_ASSERT(cond,...)
Checked assertion, for constraint enforcement.
Definition assert.h:67
T end(T... args)
#define LIBREPR_HINT_INLINE
Definition macro/util.h:56
std::conditional_t< sizeof(void *)==8, std::uint32_t, std::uint16_t > uhalfptr_t
Definition denoise.h:21
std::make_signed_t< uhalfptr_t > ihalfptr_t
Definition denoise.h:23
Definition assert.h:89
LIBREPR_HINT_INLINE std::string denoise_name(std::string_view name)
Formats undecorated symbols to match itanium's symbols.
Definition denoise.h:240
std::string code_for()
Definition repr:39
Wrapper around std::string.
Definition buffer.h:14
void write(const char *beg, const char *end)
Definition buffer.h:23
Definition denoise.h:34
static LIBREPR_HINT_INLINE bool isWord(char c)
Definition denoise.h:42
LIBREPR_HINT_INLINE char current_first() const noexcept
Definition denoise.h:76
SymToken next() noexcept
Definition denoise.h:52
SymLexer(std::string_view sym)
Definition denoise.h:36
char peek_first() const noexcept
Definition denoise.h:87
LIBREPR_HINT_INLINE void write(SymToken tok)
Definition denoise.h:95
LIBREPR_HINT_INLINE char last_first() const noexcept
Definition denoise.h:80
LIBREPR_HINT_INLINE SymToken current() const noexcept
Definition denoise.h:68
SymBuffer buf
Definition denoise.h:152
ihalfptr_t nest_depth
Definition denoise.h:153
LIBREPR_HINT_INLINE SymToken last() const noexcept
Definition denoise.h:72
static LIBREPR_HINT_INLINE bool isQualifiedName(char c)
Definition denoise.h:48
std::string_view to_sv(SymToken tok) const
Definition denoise.h:99
Does the actual formatting.
Definition denoise.h:157
SymParser(std::string_view sym)
Definition denoise.h:158
std::string && extract()
Definition denoise.h:179
void parse()
Definition denoise.h:162
Definition denoise.h:26
ihalfptr_t loc
Definition denoise.h:27
uhalfptr_t length
Definition denoise.h:28