I am trying to investigate an issue where the Windows C++ API BackupRead, used in our software, appears to get stuck for few files. I suspected large file streams to be the reason.
I wrote a small tool to simulate the BackupRead calls called in loop to read all the streams of the file and output them. I am seeing the tool also gets stuck in BackupRead calls for Microsoft Excel files (.xls and .xlsx).
I wanted to know if there is anything special that these excel files have that is causing this behaviour.
Tool output (stuck after reading BACKUP_DATA):
Getting streams for file: file.xls
----------------------------
Header BackupRead returned: 1, BytesRead: 20
StreamId: BACKUP_SECURITY_DATA, Size: 260, NameSize: 0
TotalStreamSize: 260, RemainingBufferSize: 1048556, OffsetInBuffer: 20
Data BackupRead returned: 1, BytesRead: 260, ToRead: 260
Completed reading stream: BACKUP_SECURITY_DATA and resetting buffer for next stream
----------------------------
Header BackupRead returned: 1, BytesRead: 20
StreamId: BACKUP_DATA, Size: 43520, NameSize: 0
TotalStreamSize: 43520, RemainingBufferSize: 1048556, OffsetInBuffer: 20
Data BackupRead returned: 1, BytesRead: 43520, ToRead: 43520
Completed reading stream: BACKUP_DATA and resetting buffer for next stream
Expected tool output:
Getting streams for file: file.pdf
----------------------------
Header BackupRead returned: 1, BytesRead: 20
StreamId: BACKUP_SECURITY_DATA, Size: 200, NameSize: 0
TotalStreamSize: 200, RemainingBufferSize: 1048556, OffsetInBuffer: 20
Data BackupRead returned: 1, BytesRead: 200, ToRead: 200
Completed reading stream: BACKUP_SECURITY_DATA and resetting buffer for next stream
----------------------------
Header BackupRead returned: 1, BytesRead: 20
StreamId: BACKUP_DATA, Size: 229272, NameSize: 0
TotalStreamSize: 229272, RemainingBufferSize: 1048556, OffsetInBuffer: 20
Data BackupRead returned: 1, BytesRead: 229272, ToRead: 229272
Completed reading stream: BACKUP_DATA and resetting buffer for next stream
----------------------------
Header BackupRead returned: 1, BytesRead: 20
StreamId: BACKUP_OBJECT_ID, Size: 64, NameSize: 0
TotalStreamSize: 64, RemainingBufferSize: 1048556, OffsetInBuffer: 20
Data BackupRead returned: 1, BytesRead: 64, ToRead: 64
Completed reading stream: BACKUP_OBJECT_ID and resetting buffer for next stream
----------------------------
Header BackupRead returned: 1, BytesRead: 0
No more streams to read
Tool code:
int main()
{
wstring wstrFile = "<FilePathHere>";
HANDLE hFile = CreateFileW(
wstrFile.c_str(),
GENERIC_READ | ACCESS_SYSTEM_SECURITY,
FILE_SHARE_READ,
nullptr,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_SEQUENTIAL_SCAN,
nullptr);
if (hFile == INVALID_HANDLE_VALUE)
{
std::wcerr << L"CreateFileW failed: " << GetLastError() << std::endl;
return 1;
}
char* szAllocatedBuffer = new char[1024 * 1024]; // 1 MB buffer
const DWORD dwWin32MinimumStreamReadSize = sizeof(DWORD) * 3 + sizeof(LARGE_INTEGER);
unsigned long ulOffsetInBuffer = 0;
WIN32_STREAM_ID sWin32StreamBuffer = { 0 };
LONGLONG llRemainingBufferSize = 1024 * 1024; // 1 MB buffer
unsigned long ulBytesRead = 0;
DWORD dwStreamId = 0;
LPVOID pContext = nullptr;
BOOL bResult = FALSE;
// Loop to read all the streams with following logic:
// 1. Read the header (which is WIN32_STREAM_ID structure)
// 2. Get the stream id and stream size (QuadPart + dwStreamNameSize)
// 3. Read the complete stream into the allocated buffer (I dont care about the content so it is okay to overwrite the buffer)
// 4. Repeat until no more streams
while (true)
{
// Step 1: Read the header
bResult = BackupRead(
hFile,
(unsigned char *)(&sWin32StreamBuffer),
dwWin32MinimumStreamReadSize,
&ulBytesRead,
FALSE,
TRUE,
&pContext);
std::cout << "----------------------------" << std::endl;
std::cout << "Header BackupRead returned: " << bResult << ", BytesRead: " << ulBytesRead << std::endl;
if (bResult && ulBytesRead == 0)
{
std::cout << "No more streams to read" << std::endl;
break;
}
if (!bResult || ulBytesRead == 0)
{
std::wcerr << L"BackupRead failed or no more data: " << GetLastError() << std::endl;
break;
}
if (ulBytesRead < dwWin32MinimumStreamReadSize)
{
std::wcerr << L"BackupRead read less than minimum required size: " << ulBytesRead << std::endl;
break;
}
llRemainingBufferSize -= ulBytesRead;
ulOffsetInBuffer += ulBytesRead;
// Step 2: Get the stream id and stream size
LPWIN32_STREAM_ID pStreamHeader = (LPWIN32_STREAM_ID)(&sWin32StreamBuffer);
dwStreamId = pStreamHeader->dwStreamId;
LONGLONG llStreamSize = pStreamHeader->Size.QuadPart;
LONGLONG llStreamNameSize = pStreamHeader->dwStreamNameSize;
std::cout << "StreamId: " << StreamTypeName(dwStreamId)
<< ", Size: " << llStreamSize
<< ", NameSize: " << llStreamNameSize
<< std::endl;
LONGLONG llTotalStreamSize = llStreamSize + llStreamNameSize;
if (llTotalStreamSize > llRemainingBufferSize)
{
// Okay to re-use the buffer as I dont care about the content. Just reset the buffer
std::cout << "Resetting buffer as remaining buffer size is insufficient" << std::endl;
ulOffsetInBuffer = 0;
llRemainingBufferSize = 1024 * 1024; // 1 MB buffer
}
// Step 3: Read the complete stream into the allocated buffer
// If stream size is greater than buffer size, loop until complete stream is read
while (llTotalStreamSize > 0)
{
std::cout << "TotalStreamSize: " << llTotalStreamSize
<< ", RemainingBufferSize: " << llRemainingBufferSize
<< ", OffsetInBuffer: " << ulOffsetInBuffer
<< std::endl;
DWORD dwToRead = (DWORD)((llTotalStreamSize > llRemainingBufferSize) ? llRemainingBufferSize : llTotalStreamSize);
bResult = BackupRead(
hFile,
(unsigned char *)(szAllocatedBuffer + ulOffsetInBuffer),
dwToRead,
&ulBytesRead,
FALSE,
TRUE,
&pContext);
std::cout << "Data BackupRead returned: " << bResult << ", BytesRead: " << ulBytesRead << ", ToRead: " << dwToRead << std::endl;
if (!bResult)
{
std::wcerr << L"BackupRead failed while reading stream data: " << GetLastError() << std::endl;
break;
}
if (ulBytesRead == 0)
{
std::wcerr << L"BackupRead read zero bytes while reading stream data" << std::endl;
break;
}
llTotalStreamSize -= ulBytesRead;
llRemainingBufferSize -= ulBytesRead;
ulOffsetInBuffer += ulBytesRead;
if (llTotalStreamSize > 0 && llRemainingBufferSize == 0)
{
// Okay to re-use the buffer as I dont care about the content. Just reset the buffer
std::cout << "Resetting buffer as remaining buffer size is zero" << std::endl;
ulOffsetInBuffer = 0;
llRemainingBufferSize = 1024 * 1024; // 1 MB buffer
}
}
if (llTotalStreamSize > 0)
{
std::wcerr << L"Failed to read complete stream data" << std::endl;
break;
}
std::cout << "Completed reading stream: " << StreamTypeName(dwStreamId) << " and resetting buffer for next stream" << std::endl;
llRemainingBufferSize = 1024 * 1024; // 1 MB buffer
ulOffsetInBuffer = 0;
}
// Step 4: Cleanup: Make the final BackupRead call with bAbort = TRUE, free the context and close the file handle
bResult = BackupRead(
hFile,
nullptr,
0,
nullptr,
TRUE,
FALSE,
&pContext);
CloseHandle(hFile);
return 0;
}
GetLastError. You are supposed to callGetLastErrorimmediately after you call the Windows API function that fails. Instead, you are doing things like this:std::cout << "Data BackupRead returned: " << bResult << ", BytesRead: " << ulBytesRead << ", ToRead: " << dwToRead << std::endl;before callingGetLastError. What if thatcoutand the iostream operations using<<resets the error flag, thus the subsequent call toGetLastErrorreturns the incorrect value?