What is a File?
Typically we think of files as a container for data. Largely due to the naming format Windows uses to describe the things a user interacts with, a user typically associates a “file” with data written to the hard drive.
While by extension this may be true, a more abstract definition is more accurate. A “file” is an object (a container) that Windows can manipulate. This object (container) is called a File Stream. For simplicity, we can use the fstream library to manipulate text based files.
#include <fstream>
To verify a file exists, we can try to open and check for success:
bool fileExists(char *fileName) { bool retVal; std::ifstream inFile(fileName); //instatiate file stream object, and open it; retVal = inFile.good(); inFile.close(); return retVal; }
Similarly we can create a file just by opening it:
void createFile(char *fileName) { std::fstream targetFileCreate; //instatiate file stream object; targetFileCreate.open(fileName, std::fstream::out); targetFileCreate << std::flush; targetFileCreate.close(); }
With a solid understanding of pointers and character arrays (see Fundamentals), reading a character file is easy:
NOTES: this function takes a NULL pointer as an argument and returns a dynamically allocated array that must be deleted to avoid memory leaks. Alternatively use readCSVDimensions() (presented below) to predetermine csv file dimensions and rewrite function to pre-allocate memory.
int readFile(char** charBuffer, char* fileName, int* bufferLength) { int retVal = 0; if (*charBuffer == NULL) { std::ifstream inFile(fileName, std::fstream::in); //instatiate file stream object, and open it; if (inFile.is_open()) { inFile.seekg(0, inFile.end); //find end of file *bufferLength = 1 + (int)inFile.tellg(); //get cursor location inFile.seekg(0, inFile.beg); //place curser at beginning *charBuffer = new char[*bufferLength]; //allocate memory memset(*charBuffer, '\0', *bufferLength); //set whole char array to null chars inFile.read(*charBuffer, *bufferLength - 1); //read file less null char inFile.close(); //close file retVal = 0; //success } else { *bufferLength = 0; retVal = -2; //file could not open } } else { std::cout << "Error in alx::fileIO::readFile()\n" <<std::endl; *bufferLength = 0; retVal = -1; //charBuffer was not NULL } return retVal; }
int main() { char* charBuffer = NULL; char fileName[70] = "file.txt"; int bufferLength = 0; //readFile allocates memory internally int retVal = readFile(&charBuffer, fileName, &bufferLength); std::cout << "file contents: " << charBuffer << std::endl; //delete memory delete[] charBuffer; }
To write simple character files (for lengthier files see writeCSV2() as an example):
int writeToFile(char* fileName, char* content, int writeMode) { //writeModes: 0: open or create, 1. append or create std::ofstream outFile; //instatiate file stream object; switch (writeMode) { case 1: { //create or append outFile.open(fileName, std::ofstream::out | std::ofstream::app); break; } default: //open or create outFile.open(fileName, std::ofstream::out | std::ofstream::trunc); break; } outFile << content; outFile.close(); return 0; }
int main() { char charBuffer[30] = "contents to write to file...\n"; char fileName[70] = "file.txt"; writeToFile(fileName, charBuffer, 0); }
Reading CSV Files
To read comma separated value files (.CSV) we need to convert to and from specific data types and format/parse the lines of text relative to rows and columns of the data set.
int readCSVDimensions(char* fileChars, int* rows, int* columns) { //Note: assumes a rectangular table of values with specific EOL and seperation characters *rows = 0; *columns = 1; //determine dinensions char* ptrLineEnd = strchr(fileChars, '\n'); char* ptrValSep = strchr(fileChars, ','); while (ptrLineEnd > ptrValSep) { (*columns)++; ptrValSep = strchr(ptrValSep + 1, ','); } while (ptrLineEnd != NULL) { (*rows)++; ptrLineEnd = strchr(ptrLineEnd + 1, '\n'); } std::cout << " dimensions: " << *rows << " x " << *columns << std::endl; return 0; } int readCSV2D(char* fileName, double*** data, int* rows, int* columns){ //returns a dynamically allocated array** you must delete it externally //WARNING: uses strtod which can return a zero value on failure!!! int retVal = 0; if (*data == NULL) { *rows = 0; *columns = 0; char* charBuffer = NULL; int charBufferLength = 0; int charRetVal = readFile(&charBuffer, fileName, &charBufferLength); if (charRetVal == 0) { //std::cout <<"file contents" << charBuffer << std::endl; if (charBufferLength > 0) { readCSVDimensions(charBuffer, rows, columns); //allocate memory (*data) = new double* [*rows]; for (int r = 0; r < *rows; r++) (*data)[r] = new double[*columns]; //step through seperations characters to parse data char* ptrValStart = charBuffer; //pointer to a character in Buffer, init to first character char* ptrValEnd; //char* ptrLineEnd; bool endOfFile = false; for (int r = 0; r < *rows; r++) { //for each row for (int c = 0; c < *columns; c++) { //for each column //read next value and get new end ptr; if error val is zero padded!!!! (*data)[r][c] = strtod(ptrValStart, &ptrValEnd); ptrValStart = ptrValEnd + 1; //reset start ptr } } delete[] charBuffer; } else { retVal = -3; //file is empty std::cout << "ERROR file is empty" << std::endl; } } else{ retVal = -2; //could not read file std::cout << "ERROR reading file" << std::endl; } } else { std::cout << "ERROR passing NULL value" << std::endl; retVal = -1; *rows = 0; *columns = 0; } return retVal; }
int main() { int rows = 0; int columns = 0; char fileName[12] = "C:/data.csv"; double** data = NULL; readCSV2D(fileName, &data, &rows, &columns); for (int i = 0; i < rows; i++) for (int j = 0; j < columns;j++) std::cout << data[i][j] << std::endl; for (int i = 0; i < rows; i++) delete[] data[i]; delete[] data; data = NULL; }
Writing CSV Files
Writing simple CSV files is straightforward when allowing fstream to automatically determine the periodicity of systems calls (when to actually write the data to disk):
int writeCSV(char* fileName, double** data2D, int rows, int columns, int writeMode) { //writeModes: 0: overwrite or create, 1. append or create std::ofstream outFile; //instatiate file stream object; outFile.precision(9); //sets precision for floating point values in stream switch (writeMode) { case 1: { //create or append outFile.open(fileName, std::ofstream::out | std::ofstream::app); outFile.flush(); //push content to file (if any) break; } default: //overwrite or create outFile.open(fileName, std::ofstream::out | std::ofstream::trunc); break; } //count characters needed to conver double to char //then write n characters to file for (int h = 0; h < rows; h++) { //for each row for (int w = 0; w < columns - 1; w++) { outFile << data2D[h][w] << ", "; } outFile << data2D[h][columns - 1] << "\n"; } outFile.close(); //flush and close return 0; }
When writing large files (millions of data elements), it is advantageous to pre-parse the data and force control of the fstream object to minimize system calls. This significantly reduces write time (For example: when using VS2019 targeting Windows 10, write times of 15 minutes can be reduced to less than 1 minute).
int writeCSV2(char* fileName, double** data2D, int rows, int columns, int writeMode) { //writeModes: 0: overwrite or create, 1. append or create std::ofstream outFile; //instatiate file stream object; outFile.precision(9); //sets precision for floating point values in stream switch (writeMode) { case 1: { //create or append outFile.open(fileName, std::ofstream::out | std::ofstream::app); outFile.flush(); //push content to file (if any) break; } default: //overwrite or create outFile.open(fileName, std::ofstream::out | std::ofstream::trunc); break; } //count characters needed to conver double to char //then write n characters to file const int bufferSize = 1650000; char* charBuffer = new char[bufferSize]; //prealocate character buffer char charValue[255]; int charValueLength = 0; memset(charBuffer, '\0', bufferSize); int bytesInBuffer = 0; int h = 0, w = 0; while (true) { if (w < (columns - 1)) { double val = data2D[h][w]; sprintf_s(charValue, 255, "%.4f, ", data2D[h][w]); charValueLength = strlen(charValue); memcpy(charBuffer + bytesInBuffer, charValue, charValueLength); //bufferSize - bytesInBuffer bytesInBuffer += charValueLength; w++; } else { sprintf_s(charBuffer + bytesInBuffer, bufferSize - bytesInBuffer, "%.4f\n", data2D[h][w]); bytesInBuffer = strlen(charBuffer); w = 0; h++; } if (bytesInBuffer > 1649900) { std::cout << "pushing to file..." << std::endl; outFile << charBuffer; //write to file memset(charBuffer, '/0', bufferSize); bytesInBuffer = 0; } if (h > (rows-1)) { std::cout << "pushing to file..." << std::endl; outFile << charBuffer; //write to file outFile.close(); //flush and close break; } } delete[] charBuffer; charBuffer = NULL; outFile.close(); //flush and close return 0; }