lib/nbt: Implement parser

Soda
BodgeMaster 2022-10-15 23:05:26 +02:00
parent e9bfb6eeee
commit cdc23e7468
2 changed files with 366 additions and 0 deletions

View File

@ -1168,6 +1168,371 @@ namespace NBT {
}
// the same comment about blindly passing up error codes applies to this function
// FIXME: memory leak when returning errors
ErrorOr<std::vector<Tag::Generic*>> deserializeRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
std::vector<Tag::Generic*> contents;
// get contained data length by reading it manually because
// the function that does it normally can't deal with
// headerless tags
//
// add one byte to position to skip the type byte
ErrorOr<int32_t> elementCount = Helper::readInt32(data, dataSize, initialPosition+1);
if (elementCount.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, elementCount.errorCode);
}
uint8_t contentType = data[initialPosition];
// contained type byte + 4 length bytes = 5
*processedDataSize = 5;
switch (contentType) {
case TagType::END: {
// everything except content has been touched at this point
// and a list of end tags has no content that could be read
for (int32_t i=0; i<elementCount.value; i++) {
contents.push_back(new Tag::End());
}
break;
}
case TagType::INT8: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int8_t> nextInt = Helper::readInt8(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int8("", nextInt.value));
// The below code would produce a warning on GCC and Clang
// about the computed value not being used. While this does
// apply inside this function, it is ultimately not true
// as the pointer is used both inside and outside of the
// function.
*processedDataSize += 1;
}
break;
}
case TagType::INT16: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int16_t> nextInt = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int16("", nextInt.value));
*processedDataSize += 2;
}
break;
}
case TagType::INT32: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int32_t> nextInt = Helper::readInt32(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int32("", nextInt.value));
*processedDataSize += 4;
}
break;
}
case TagType::FLOAT: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<float> nextFloat = Helper::readFloat(data, dataSize, initialPosition+*processedDataSize);
if (nextFloat.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextFloat.errorCode);
}
contents.push_back(new Tag::Float("", nextFloat.value));
*processedDataSize += 4;
}
break;
}
case TagType::INT64: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<int64_t> nextInt = Helper::readInt64(data, dataSize, initialPosition+*processedDataSize);
if (nextInt.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextInt.errorCode);
}
contents.push_back(new Tag::Int64("", nextInt.value));
*processedDataSize += 8;
}
break;
}
case TagType::DOUBLE: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<double> nextDouble = Helper::readDouble(data, dataSize, initialPosition+*processedDataSize);
if (nextDouble.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextDouble.errorCode);
}
contents.push_back(new Tag::Double("", nextDouble.value));
*processedDataSize += 8;
}
break;
}
case TagType::INT8_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int8_t>> nextArray = Helper::readInt8Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
}
contents.push_back(new Tag::Int8Array("", nextArray.value));
*processedDataSize += (uint64_t) nextArray.value.size();
}
break;
}
case TagType::STRING: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<tiny_utf8::string> nextString = Helper::readString(data, dataSize, initialPosition+*processedDataSize);
if (nextString.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextString.errorCode);
}
contents.push_back(new Tag::String("", nextString.value));
// this cannot be an error because it just got read
int16_t nextStringSize = Helper::readInt16(data, dataSize, initialPosition+*processedDataSize).value;
*processedDataSize += (uint64_t) nextStringSize + 2;
}
break;
}
case TagType::LIST: {
uint64_t* containedDataSize = new uint64_t;
for (int32_t i=0; i<elementCount.value; i++) {
*containedDataSize = 0;
ErrorOr<std::vector<Tag::Generic*>> nextListContents = deserializeRawListContents(data, dataSize, initialPosition+*processedDataSize, containedDataSize);
if (nextListContents.isError) {
delete containedDataSize;
return ErrorOr<std::vector<Tag::Generic*>>(true, nextListContents.errorCode);
}
contents.push_back(new Tag::List("", nextListContents.value));
*processedDataSize += *containedDataSize;
}
delete containedDataSize;
break;
}
case TagType::COMPOUND: {
uint64_t* containedDataSize = new uint64_t;
for (int32_t i=0; i<elementCount.value; i++) {
*containedDataSize = 0;
ErrorOr<std::vector<Tag::Generic*>> nextCompoundData = deserialize(data, dataSize, initialPosition+*processedDataSize, containedDataSize);
if (nextCompoundData.isError) {
delete containedDataSize;
return ErrorOr<std::vector<Tag::Generic*>>(true, nextCompoundData.errorCode);
}
contents.push_back(new Tag::Compound("", nextCompoundData.value));
*processedDataSize += *containedDataSize;
}
delete containedDataSize;
break;
}
case TagType::INT32_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int32_t>> nextArray = Helper::readInt32Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
}
contents.push_back(new Tag::Int32Array("", nextArray.value));
*processedDataSize += (uint64_t) nextArray.value.size() * 4;
}
break;
}
case TagType::INT64_ARRAY: {
for (int32_t i=0; i<elementCount.value; i++) {
ErrorOr<std::vector<int64_t>> nextArray = Helper::readInt64Array(data, dataSize, initialPosition+*processedDataSize);
if (nextArray.isError) {
return ErrorOr<std::vector<Tag::Generic*>>(true, nextArray.errorCode);
}
contents.push_back(new Tag::Int64Array("", nextArray.value));
*processedDataSize += (uint64_t) nextArray.value.size() * 8;
}
break;
}
default:
return ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::INVALID_TYPE);
}
return ErrorOr<std::vector<Tag::Generic*>>(contents);
}
// comment about blindly passing up error codes applies here
//
// The return value of this function is a vector of tags
// instead of a compound tag due to a spec extension that allows
// for any bare tag to be valid NBT data without a containing
// compound tag. This also just makes the implementation easier.
ErrorOr<std::vector<Tag::Generic*>> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize){
if (initialPosition >= dataSize) {
if (processedDataSize!=nullptr) *processedDataSize=0;
return ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::OUT_OF_RANGE);
// An interesting question at this point is whether we should
// consider empty input valid or invalid NBT data.
//
// The original spec says that the top-most tag is always a
// compound (or in more recent times, the Microsoft-commercialized
// in-game-purchase-enabling version also allows list tags)
// which automatically means that no data is invalid data...
// I don't see a reason why having a different tag as the top-most
// tag shouldn't be valid NBT in which case we have to face the
// question whether no data is invalid or just empty NBT data.
//
// This seems like a reasonable extension to the spec to me and
// it should be backwards compatible AFAIK.
//
// - BodgeMaster
}
std::vector<Tag::Generic*> tags = std::vector<Tag::Generic*>();
ErrorOr<std::vector<Tag::Generic*>> returnValue;
uint64_t currentPosition = initialPosition;
while (currentPosition<dataSize) {
ErrorOr<uint64_t> nextTagSize = Helper::totalTagSize(data, dataSize, currentPosition);
if (nextTagSize.isError) {
if (nextTagSize.errorCode == ErrorCodes::NOT_YET_KNOWN) {
ErrorOr<tiny_utf8::string> tagName = Helper::readString(data, dataSize, currentPosition+1);
if (tagName.isError) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, tagName.errorCode);
goto returnNow;
}
// used seek to the start of the list's/compounds contents
//
// there is no way this is an error bc it gets
// checked while trying to parse the string above
int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value;
uint64_t* processedTagSize = new uint64_t;
*processedTagSize = 0;
if (data[currentPosition]==TagType::LIST) {
// type byte + two name size bytes = 3
ErrorOr<std::vector<Tag::Generic*>> listData = deserializeRawListContents(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize);
if (listData.isError) {
delete processedTagSize;
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, listData.errorCode);
goto returnNow;
}
tags.push_back(new Tag::List(tagName.value, listData.value));
*processedTagSize += (uint64_t) nameSize + 3;
}
if (data[currentPosition]==TagType::COMPOUND) {
// type byte + two name size bytes = 3
ErrorOr<std::vector<Tag::Generic*>> compoundData = deserialize(data, dataSize, currentPosition + (uint64_t) nameSize + 3, processedTagSize);
if (compoundData.isError) {
delete processedTagSize;
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, compoundData.errorCode);
goto returnNow;
}
tags.push_back(new Tag::Compound(tagName.value, compoundData.value));
*processedTagSize += (uint64_t) nameSize + 3;
}
currentPosition += *processedTagSize;
delete processedTagSize;
continue;
}
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, nextTagSize.errorCode);
goto returnNow;
}
if (currentPosition + nextTagSize.value > dataSize) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::OVERRUN);
goto returnNow;
}
// recursion abort condition
if (data[currentPosition]==TagType::END) {
// not appending an end tag as it is built into
// the compound anyway
currentPosition++;
returnValue = ErrorOr<std::vector<Tag::Generic*>>(tags);
goto returnNow;
}
// nameSize cannot be an error here bc it got checked in
// nextTagSize() already
int16_t nameSize = Helper::readInt16(data, dataSize, currentPosition+1).value;
ErrorOr<tiny_utf8::string> name = Helper::readString(data, dataSize, currentPosition+1);
if (name.isError) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, name.errorCode);
goto returnNow;
}
// Overrun / out of range errors have already been ruled out by
// checking the tag size against the total amount of data.
switch (data[currentPosition]) {
case TagType::INT8: {
int8_t content = Helper::readInt8(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int8(name.value, content));
break;
}
case TagType::INT16: {
int16_t content = Helper::readInt16(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int16(name.value, content));
break;
}
case TagType::INT32: {
int32_t content = Helper::readInt32(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int32(name.value, content));
break;
}
case TagType::INT64: {
int64_t content = Helper::readInt64(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int64(name.value, content));
break;
}
case TagType::FLOAT: {
float content = Helper::readFloat(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Float(name.value, content));
break;
}
case TagType::DOUBLE: {
double content = Helper::readDouble(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Double(name.value, content));
break;
}
case TagType::INT8_ARRAY: {
std::vector<int8_t> content = Helper::readInt8Array(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int8Array(name.value, content));
break;
}
case TagType::STRING: {
ErrorOr<tiny_utf8::string> content = Helper::readString(data, dataSize, currentPosition+nameSize+3);
if (content.isError) {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, content.errorCode);
goto returnNow;
}
tags.push_back(new Tag::String(name.value, content.value));
break;
}
case TagType::INT32_ARRAY: {
std::vector<int32_t> content = Helper::readInt32Array(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int32Array(name.value, content));
break;
}
case TagType::INT64_ARRAY: {
std::vector<int64_t> content = Helper::readInt64Array(data, dataSize, currentPosition+nameSize+3).value;
tags.push_back(new Tag::Int64Array(name.value, content));
break;
}
default: {
returnValue = ErrorOr<std::vector<Tag::Generic*>>(true, ErrorCodes::UNKNOWN);
goto returnNow;
}
}
currentPosition += nextTagSize.value;
}
returnValue = ErrorOr<std::vector<Tag::Generic*>>(tags);
goto returnNow;
returnNow:
if (processedDataSize!=nullptr) {
*processedDataSize = currentPosition-initialPosition;
}
if (returnValue.isError) {
for (uint64_t i=0; i<tags.size(); i++) {
delete tags[i];
}
}
return returnValue;
}
bool validateRawListContents(uint8_t data[], uint64_t dataSize, uint64_t initialPosition, uint64_t* processedDataSize) {
// get contained data length by reading it manually because
// the function that does it normally can't deal with

View File

@ -301,5 +301,6 @@ namespace NBT {
};
}
ErrorOr<std::vector<Tag::Generic*>> deserialize(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
bool validateRawNBTData(uint8_t data[], uint64_t dataSize, uint64_t initialPosition=0, uint64_t* processedDataSize=nullptr);
}