I have known about std::variant for a while but had dismissed it as not very useful (I have revised that opinion now). But I would like a second opinion on my usage to see if I can improve this.
The original class looked like this:
namespace ThorsAnvil::Serialize
{
class ParserInterface
{
public:
ParserInterface(std::istream& stream)
: input(stream)
{}
virtual ~ParserInterface() {}
virtual ParserToken getNextToken() = 0;
virtual std::string getKey() = 0;
virtual void getValue(short int&) = 0;
virtual void getValue(int&) = 0;
virtual void getValue(long int&) = 0;
virtual void getValue(long long int&) = 0;
virtual void getValue(unsigned short int&) = 0;
virtual void getValue(unsigned int&) = 0;
virtual void getValue(unsigned long int&) = 0;
virtual void getValue(unsigned long long int&)= 0;
virtual void getValue(float&) = 0;
virtual void getValue(double&) = 0;
virtual void getValue(long double&) = 0;
virtual void getValue(bool&) = 0;
virtual void getValue(std::string&) = 0;
virtual bool isValueNull() = 0;
virtual std::string getRawValue() = 0;
protected:
bool read(char* dst, std::size_t size)
{
return static_cast<bool>(input.read(dst, size));
}
bool readTo(std::string& dst, char delim)
{
return static_cast<bool>(std::getline(input, dst, delim));
}
std::size_t lastReadCount() const
{
return input.gcount();
}
std::streampos getPos()
{
return input.tellg();
}
int get()
{
return input.get();
}
int peek()
{
return input.peek();
}
void ignore(std::size_t size)
{
input.ignore(size);
}
void clear()
{
input.clear();
}
void unget()
{
input.unget();
}
bool ok() const
{
return !input.fail();
}
void setFail()
{
input.setstate(std::ios::failbit);
}
template<typename T>
bool readValue(T& value)
{
return static_cast<bool>(input >> value);
}
private:
std::istream& input;
};
}
I then have implementations for Json/Yaml/Bson that override the virtual functions to provide the actual implementation. But all these implementations use the protected methods to read from the stream.
I did some experiments and found that I this could be much more efficient with a std::string as the underlying storage. I know I can move a std::string into a std::stringstream and simply use the code above, but the overhead of the string stream is very high. So I decided I wanted to modify the class to support strings directly.
This is where std::variant comes in. Without changing anything (drastically) I made the input type std::variant<std::istream&, StringInput>. Note: I had to change std::istream& to std::istream* as std::variant does not support references (and I could not move the stream). Also StringInput is a very basic wrapper around std::string_view that simply supports the methods I need.
Thus the class I would like reviewed is:
namespace ThorsAnvil::Serialize
{
class ParserInterface
{
public:
// Added a new constructor
// So it can take a string in addition to a stream.
ParserInterface(std::string_view const& str)
: input(str)
{}
ParserInterface(std::istream& stream, ParserConfig config = ParserConfig{})
: input(&stream)
{}
virtual ~ParserInterface() {}
virtual ParserToken getNextToken() = 0;
virtual std::string getKey() = 0;
virtual void getValue(short int&) = 0;
virtual void getValue(int&) = 0;
virtual void getValue(long int&) = 0;
virtual void getValue(long long int&) = 0;
virtual void getValue(unsigned short int&) = 0;
virtual void getValue(unsigned int&) = 0;
virtual void getValue(unsigned long int&) = 0;
virtual void getValue(unsigned long long int&)= 0;
virtual void getValue(float&) = 0;
virtual void getValue(double&) = 0;
virtual void getValue(long double&) = 0;
virtual void getValue(bool&) = 0;
virtual void getValue(std::string&) = 0;
virtual bool isValueNull() = 0;
virtual std::string getRawValue() = 0;
protected:
// The protected functions are all modified to read from
// either a string or stream using the `std::visit()` method.
bool read(char* dst, std::size_t size)
{
struct Read
{
char* dst;
std::size_t size;
Read(char* dst, std::size_t size):dst(dst),size(size){}
bool operator()(std::istream* input) {return static_cast<bool>(input->read(dst, size));}
bool operator()(StringInput& input) {return input.read(dst, size);}
};
return std::visit(Read{dst, size}, input);
}
bool readTo(std::string& dst, char delim)
{
struct ReadTo
{
std::string& dst;
char delim;
ReadTo(std::string& dst, char delim):dst(dst),delim(delim){}
bool operator()(std::istream* input) {return static_cast<bool>(std::getline((*input), dst, delim));}
bool operator()(StringInput& input) {return input.readTo(dst, delim);}
};
return std::visit(ReadTo(dst, delim), input);
}
std::size_t lastReadCount() const
{
struct LastReadCount
{
std::size_t operator()(std::istream const* input) {return input->gcount();}
std::size_t operator()(StringInput const& input) {return input.getLastReadCount();}
};
return std::visit(LastReadCount{}, input);
}
std::streampos getPos()
{
struct GetPos
{
std::streampos operator()(std::istream* input) {return input->tellg();}
std::streampos operator()(StringInput& input) {return input.getPos();}
};
return std::visit(GetPos{}, input);
}
int get()
{
struct Get
{
int operator()(std::istream* input) {return input->get();}
int operator()(StringInput& input) {return input.get();}
};
return std::visit(Get{}, input);
}
int peek()
{
struct Peek
{
int operator()(std::istream* input) {return input->peek();}
int operator()(StringInput& input) {return input.peek();}
};
return std::visit(Peek{}, input);
}
void ignore(std::size_t size)
{
struct Ignore
{
std::size_t size;
Ignore(std::size_t size): size(size) {}
void operator()(std::istream* input) {input->ignore(size);}
void operator()(StringInput& input) {input.ignore(size);}
};
std::visit(Ignore{size}, input);
}
void clear()
{
struct Clear
{
void operator()(std::istream* input) {input->clear();}
void operator()(StringInput& input) {input.clear();}
};
std::visit(Clear{}, input);
}
void unget()
{
struct Unget
{
void operator()(std::istream* input) {input->unget();}
void operator()(StringInput& input) {input.unget();}
};
std::visit(Unget{}, input);
}
bool ok() const
{
struct OK
{
bool operator()(std::istream const* input) {return !input->fail();}
bool operator()(StringInput const& input) {return input.isOk();}
};
return std::visit(OK{}, input);
}
void setFail()
{
struct SetFail
{
void operator()(std::istream* input) {input->setstate(std::ios::failbit);}
void operator()(StringInput& input) {input.setFail();}
};
std::visit(SetFail{}, input);
}
template<typename T>
bool readValue(T& value)
{
struct ReadValue
{
T& value;
ReadValue(T& value) :value(value) {}
bool operator()(std::istream* input) {return static_cast<bool>((*input) >> value);}
bool operator()(StringInput& input) {input.readValue(value);return true;}
};
return std::visit(ReadValue{value}, input);
}
private:
using DataInputStream = std::variant<std::istream*, StringInput>;
DataInputStream input;
};
}
For completness the StringInput class:
namespace ThorsAnvil::Serialize
{
struct StringInput
{
std::string_view data;
std::size_t position;
std::size_t lastRead;
public:
StringInput(std::string_view const& view)
: data(view)
, position(0)
, lastRead(0)
{}
bool read(char* dst, std::size_t size)
{
std::size_t copySize = std::min(size, data.size() - position);
std::copy(&data[position], &data[position + copySize], dst);
position += copySize;
lastRead = copySize;
return position <= data.size();
}
bool readTo(std::string& dst, char delim)
{
auto find = data.find(delim, position);
if (find == std::string::npos) {
find = data.size();
}
auto size = find - position;
dst.resize(size);
std::copy(&data[position], &data[position + size], &dst[0]);
position += (size + 1);
return position <= data.size();
}
std::size_t getLastReadCount() const {return lastRead;}
std::streampos getPos() {return position;}
int get() {return data[position++];}
int peek() {return data[position];}
void ignore(std::size_t size) {position += size;}
void clear() {position = 0;}
void unget() {--position;}
bool isOk() const {return position <= data.size();{
void setFail() {position = data.size() + 1;}
template<typename T>
void readValue(T& value)
{
using std::from_chars;
#if defined(NO_STD_SUPPORT_FROM_CHAR_DOUBLE) && (NO_STD_SUPPORT_FROM_CHAR_DOUBLE >= 1)
using fast_float::from_chars;
#endif
auto start = &data[position];
auto result = from_chars(start, &data[0] + data.size(), value);
if (result.ec != std::errc::invalid_argument)
{
lastRead = (result.ptr - start);
position+= lastRead;
}
}
void readValue(char& value)
{
while (position < data.size() && std::isspace(data[position])) {
++position;
}
value = (position < data.size()) ? data[position] : -1;
++position;
}
}
}
std::variantcan store references if you usestd::reference_wrapper. But if you already working withstd::istreams, then to read from a string the caller can just make astd::istringstreamor since C++23 make astd::ispanstreamfor an existingstd::string. \$\endgroup\$std::istringstream. But I am optimizing for performance. The stream interface has an excissively high overhead. By adding the theStringInputclass I have increased throughput by a factor of 4. \$\endgroup\$reference_wrapperis really just a pointer. Might as well just store the pointer. \$\endgroup\$