repr 0.1
Reconstructable string representations and more
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
denoise.h
Go to the documentation of this file.
#pragma once
#include <cstddef>
#include <set>
#include <type_traits>
namespace librepr::detail {
namespace msvc {
// For packing pos/len pairs
using uhalfptr_t =
std::conditional_t<sizeof(void*) == 8,
// Extended into the signed world
struct SymToken {
explicit LIBREPR_HINT_INLINE operator bool() const {
return (loc >= 0);
}
};
struct SymLexer {
protected:
: begin(sym.data()), cursor(begin),
buf(sym), nest_depth(), current_lexeme(), last_lexeme() {
this->end = begin + sym.size();
}
static LIBREPR_HINT_INLINE bool isWord(char c) {
return (c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c == '_');
}
static LIBREPR_HINT_INLINE bool isQualifiedName(char c) {
return isWord(c) || (c == ':') || (c >= '0' && c <= '9');
}
last_lexeme = current_lexeme;
cursor += last_lexeme.length;
auto pos = advance();
if(this->cursor == end) [[unlikely]] {
return {};
} else if(isWord(*cursor)) {
auto epos = find_nonword();
auto len = uhalfptr_t(epos - pos);
this->current_lexeme = SymToken{pos, len};
} else {
this->current_lexeme = SymToken{pos, 1};
}
return this->current_lexeme;
}
return this->current_lexeme;
}
return this->last_lexeme;
}
return this->begin[current_lexeme.loc];
}
if(last_lexeme.loc < 0) [[unlikely]] {
return '\0';
}
return this->begin[last_lexeme.loc];
}
auto pos = find_next(cursor + current_lexeme.length);
if(pos == -1) [[unlikely]] {
return '\0';
}
return this->begin[pos];
}
auto* lloc = begin + tok.loc;
LIBREPR_ASSERT(tok.length > 0,
"Invalid token length {}.", tok.length);
return {lloc, lloc + tok.length};
}
private:
ihalfptr_t advance() {
auto pos = find_next();
if(pos != -1) {
cursor = begin + pos;
} else {
cursor = end;
}
return ihalfptr_t(pos);
}
// Locates the next offset from begin
std::intptr_t find_next(
const char* pos = nullptr) const {
auto* lloc = (!pos) ? cursor : pos;
if(lloc == end) return -1;
LIBREPR_ASSERT((lloc >= begin && lloc < end),
"Invalid position!");
for(; lloc < end; ++lloc) {
char c = *lloc;
if(c != ' ') {
return std::intptr_t(lloc - begin);
}
}
return -1;
}
// Locates the next offset from begin
std::intptr_t find_nonword() const {
auto* lloc = cursor;
for(; lloc < end; ++lloc) {
return std::intptr_t(lloc - begin);
}
}
return std::intptr_t(end - begin);
}
private:
const char* begin;
const char* cursor;
const char* end;
SymToken current_lexeme;
SymToken last_lexeme;
protected:
};
void parse() {
while(auto tok = SymLexer::next()) {
if(char c = SymLexer::current_first();
{
this->handle_symbol(c);
buf.write(c);
} else if(!is_keyword(tok)) {
this->handle_ident(tok);
if(this->is_replacement(tok)) [[unlikely]] {
continue;
}
}
}
}
return std::move(SymLexer::buf.extract());
}
private:
LIBREPR_HINT_INLINE char last_written() {
if(buf->empty()) [[unlikely]] {
return '\0';
}
return buf->back();
}
LIBREPR_HINT_INLINE void handle_symbol(char c) {
if(c == '>') {
if(lc == '>') {
buf.write(' ');
}
"Invalid nest level {}.", SymLexer::nest_depth);
} else if(c == '<') {
}
}
LIBREPR_HINT_INLINE void handle_ident(SymToken) {
char lc = last_written();
buf.write(' ');
} else if(SymLexer::nest_depth > 0) {
if(lc == ',') buf.write(' ');
}
}
bool is_keyword(SymToken tok) {
"struct", "class", "union", "enum",
"__ptr64", "__cdecl", "__stdcall",
"__thiscall", "__fastcall", ""
};
return keywords.contains(tstr);
}
bool is_replacement(SymToken tok) {
if(tstr == "std::nullptr_t") {
buf.write("decltype(nullptr)");
return true;
} else if(tstr == "__int64") {
buf.write("long long");
return true;
}
return false;
}
};
} // namespace msvc
parser.parse();
return std::move(parser.extract());
}
} // namespace librepr::detail
static_assert(sizeof(void*) == sizeof(librepr::detail::msvc::SymToken));