34 typedef SSIZE_T ssize_t;
60 const long double pDefaultFloat = std::numeric_limits<long double>::signaling_NaN(),
61 const long long pDefaultInteger = 0)
95 virtual const char*
what()
const throw()
97 return "unsupported conversion datatype";
116 : mConverterParams(pConverterParams)
125 void ToStr(
const T& pVal, std::string& pStr)
const
127 if (
typeid(T) ==
typeid(
int) ||
128 typeid(T) ==
typeid(
long) ||
129 typeid(T) ==
typeid(
long long) ||
130 typeid(T) ==
typeid(
unsigned) ||
131 typeid(T) ==
typeid(
unsigned long) ||
132 typeid(T) ==
typeid(
unsigned long long) ||
133 typeid(T) ==
typeid(
float) ||
134 typeid(T) ==
typeid(
double) ||
135 typeid(T) ==
typeid(
long double) ||
136 typeid(T) ==
typeid(
char))
138 std::ostringstream out;
153 void ToVal(
const std::string& pStr, T& pVal)
const
157 if (
typeid(T) ==
typeid(
int))
159 pVal =
static_cast<T
>(std::stoi(pStr));
162 else if (
typeid(T) ==
typeid(long))
164 pVal =
static_cast<T
>(std::stol(pStr));
167 else if (
typeid(T) ==
typeid(
long long))
169 pVal =
static_cast<T
>(std::stoll(pStr));
172 else if (
typeid(T) ==
typeid(unsigned))
174 pVal =
static_cast<T
>(std::stoul(pStr));
177 else if (
typeid(T) ==
typeid(
unsigned long))
179 pVal =
static_cast<T
>(std::stoul(pStr));
182 else if (
typeid(T) ==
typeid(
unsigned long long))
184 pVal =
static_cast<T
>(std::stoull(pStr));
203 if (
typeid(T) ==
typeid(float))
205 pVal =
static_cast<T
>(std::stof(pStr));
208 else if (
typeid(T) ==
typeid(double))
210 pVal =
static_cast<T
>(std::stod(pStr));
213 else if (
typeid(T) ==
typeid(
long double))
215 pVal =
static_cast<T
>(std::stold(pStr));
232 if (
typeid(T) ==
typeid(char))
234 pVal =
static_cast<T
>(pStr[0]);
270 using ConvFunc = std::function<void (
const std::string & pStr, T & pVal)>;
287 explicit LabelParams(
const int pColumnNameIdx = 0,
const int pRowNameIdx = -1)
322 const bool pHasCR =
sPlatformHasCR,
const bool pQuotedLinebreaks =
false,
323 const bool pAutoQuote =
true)
373 const char pCommentPrefix =
'#',
374 const bool pSkipEmptyLines =
false)
413 explicit Document(
const std::string& pPath = std::string(),
419 , mLabelParams(pLabelParams)
420 , mSeparatorParams(pSeparatorParams)
421 , mConverterParams(pConverterParams)
422 , mLineReaderParams(pLineReaderParams)
445 , mLabelParams(pLabelParams)
446 , mSeparatorParams(pSeparatorParams)
447 , mConverterParams(pConverterParams)
448 , mLineReaderParams(pLineReaderParams)
463 void Load(
const std::string& pPath,
470 mLabelParams = pLabelParams;
471 mSeparatorParams = pSeparatorParams;
472 mConverterParams = pConverterParams;
473 mLineReaderParams = pLineReaderParams;
486 void Load(std::istream& pStream,
493 mLabelParams = pLabelParams;
494 mSeparatorParams = pSeparatorParams;
495 mConverterParams = pConverterParams;
496 mLineReaderParams = pLineReaderParams;
506 void Save(
const std::string& pPath = std::string())
519 void Save(std::ostream& pStream)
531 mColumnNames.clear();
548 if (mColumnNames.find(pColumnName) != mColumnNames.end())
550 return mColumnNames.at(pColumnName) - (mLabelParams.
mRowNameIdx + 1);
564 const ssize_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
565 std::vector<T> column;
567 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
571 if (columnIdx <
static_cast<ssize_t
>(itRow->size()))
574 converter.
ToVal(itRow->at(columnIdx), val);
575 column.push_back(val);
579 const std::string errStr =
"requested column index " +
580 std::to_string(columnIdx - (mLabelParams.
mRowNameIdx + 1)) +
" >= " +
581 std::to_string(itRow->size() - (mLabelParams.
mRowNameIdx + 1)) +
582 " (number of columns on row index " +
585 throw std::out_of_range(errStr);
601 const ssize_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
602 std::vector<T> column;
603 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
608 pToVal(itRow->at(columnIdx), val);
609 column.push_back(val);
621 std::vector<T>
GetColumn(
const std::string& pColumnName)
const
626 throw std::out_of_range(
"column not found: " + pColumnName);
628 return GetColumn<T>(columnIdx);
643 throw std::out_of_range(
"column not found: " + pColumnName);
645 return GetColumn<T>(columnIdx, pToVal);
654 void SetColumn(
const size_t pColumnIdx,
const std::vector<T>& pColumn)
656 const size_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
658 while (pColumn.size() + (mLabelParams.
mColumnNameIdx + 1) > GetDataRowCount())
660 std::vector<std::string> row;
661 row.resize(GetDataColumnCount());
662 mData.push_back(row);
665 if ((columnIdx + 1) > GetDataColumnCount())
667 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
669 itRow->resize(columnIdx + 1 + (mLabelParams.
mRowNameIdx + 1));
674 for (
auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
677 converter.
ToStr(*itRow, str);
688 void SetColumn(
const std::string& pColumnName,
const std::vector<T>& pColumn)
693 throw std::out_of_range(
"column not found: " + pColumnName);
695 SetColumn<T>(columnIdx, pColumn);
704 const ssize_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
705 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
707 itRow->erase(itRow->begin() + columnIdx);
720 throw std::out_of_range(
"column not found: " + pColumnName);
733 void InsertColumn(
const size_t pColumnIdx,
const std::vector<T>& pColumn = std::vector<T>(),
734 const std::string& pColumnName = std::string())
736 const size_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
738 std::vector<std::string> column;
741 column.resize(GetDataRowCount());
745 column.resize(pColumn.size() + (mLabelParams.
mColumnNameIdx + 1));
747 for (
auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
750 converter.
ToStr(*itRow, str);
752 column.at(rowIdx) = str;
756 while (column.size() > GetDataRowCount())
758 std::vector<std::string> row;
759 const size_t columnCount = std::max(
static_cast<size_t>(mLabelParams.
mColumnNameIdx + 1),
760 GetDataColumnCount());
761 row.resize(columnCount);
762 mData.push_back(row);
765 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
768 itRow->insert(itRow->begin() + columnIdx, column.at(rowIdx));
771 if (!pColumnName.empty())
783 const ssize_t count =
static_cast<ssize_t
>((mData.size() > 0) ? mData.at(0).size() : 0) -
785 return (count >= 0) ? count : 0;
797 if (mRowNames.find(pRowName) != mRowNames.end())
799 return mRowNames.at(pRowName) - (mLabelParams.
mColumnNameIdx + 1);
811 std::vector<T>
GetRow(
const size_t pRowIdx)
const
813 const ssize_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
816 for (
auto itCol = mData.at(rowIdx).begin(); itCol != mData.at(rowIdx).end(); ++itCol)
821 converter.
ToVal(*itCol, val);
837 const ssize_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
840 for (
auto itCol = mData.at(rowIdx).begin(); itCol != mData.at(rowIdx).end(); ++itCol)
858 std::vector<T>
GetRow(
const std::string& pRowName)
const
863 throw std::out_of_range(
"row not found: " + pRowName);
865 return GetRow<T>(rowIdx);
880 throw std::out_of_range(
"row not found: " + pRowName);
882 return GetRow<T>(rowIdx, pToVal);
891 void SetRow(
const size_t pRowIdx,
const std::vector<T>& pRow)
893 const size_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
895 while ((rowIdx + 1) > GetDataRowCount())
897 std::vector<std::string> row;
898 row.resize(GetDataColumnCount());
899 mData.push_back(row);
902 if (pRow.size() > GetDataColumnCount())
904 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
906 itRow->resize(pRow.size() + (mLabelParams.
mRowNameIdx + 1));
911 for (
auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
914 converter.
ToStr(*itCol, str);
925 void SetRow(
const std::string& pRowName,
const std::vector<T>& pRow)
930 throw std::out_of_range(
"row not found: " + pRowName);
932 return SetRow<T>(rowIdx, pRow);
941 const ssize_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
942 mData.erase(mData.begin() + rowIdx);
954 throw std::out_of_range(
"row not found: " + pRowName);
967 void InsertRow(
const size_t pRowIdx,
const std::vector<T>& pRow = std::vector<T>(),
968 const std::string& pRowName = std::string())
970 const size_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
972 std::vector<std::string> row;
975 row.resize(GetDataColumnCount());
979 row.resize(pRow.size() + (mLabelParams.
mRowNameIdx + 1));
981 for (
auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
984 converter.
ToStr(*itCol, str);
989 while (rowIdx > GetDataRowCount())
991 std::vector<std::string> tempRow;
992 tempRow.resize(GetDataColumnCount());
993 mData.push_back(tempRow);
996 mData.insert(mData.begin() + rowIdx, row);
998 if (!pRowName.empty())
1010 const ssize_t count =
static_cast<ssize_t
>(mData.size()) - (mLabelParams.
mColumnNameIdx + 1);
1011 return (count >= 0) ? count : 0;
1020 template<
typename T>
1021 T
GetCell(
const size_t pColumnIdx,
const size_t pRowIdx)
const
1023 const ssize_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
1024 const ssize_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
1028 converter.
ToVal(mData.at(rowIdx).at(columnIdx), val);
1039 template<
typename T>
1042 const ssize_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
1043 const ssize_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
1046 pToVal(mData.at(rowIdx).at(columnIdx), val);
1056 template<
typename T>
1057 T
GetCell(
const std::string& pColumnName,
const std::string& pRowName)
const
1062 throw std::out_of_range(
"column not found: " + pColumnName);
1065 const ssize_t rowIdx =
GetRowIdx(pRowName);
1068 throw std::out_of_range(
"row not found: " + pRowName);
1071 return GetCell<T>(columnIdx, rowIdx);
1081 template<
typename T>
1087 throw std::out_of_range(
"column not found: " + pColumnName);
1090 const ssize_t rowIdx =
GetRowIdx(pRowName);
1093 throw std::out_of_range(
"row not found: " + pRowName);
1096 return GetCell<T>(columnIdx, rowIdx, pToVal);
1105 template<
typename T>
1106 T
GetCell(
const std::string& pColumnName,
const size_t pRowIdx)
const
1111 throw std::out_of_range(
"column not found: " + pColumnName);
1114 return GetCell<T>(columnIdx, pRowIdx);
1124 template<
typename T>
1130 throw std::out_of_range(
"column not found: " + pColumnName);
1133 return GetCell<T>(columnIdx, pRowIdx, pToVal);
1142 template<
typename T>
1143 T
GetCell(
const size_t pColumnIdx,
const std::string& pRowName)
const
1145 const ssize_t rowIdx =
GetRowIdx(pRowName);
1148 throw std::out_of_range(
"row not found: " + pRowName);
1151 return GetCell<T>(pColumnIdx, rowIdx);
1161 template<
typename T>
1164 const ssize_t rowIdx =
GetRowIdx(pRowName);
1167 throw std::out_of_range(
"row not found: " + pRowName);
1170 return GetCell<T>(pColumnIdx, rowIdx, pToVal);
1179 template<
typename T>
1180 void SetCell(
const size_t pColumnIdx,
const size_t pRowIdx,
const T& pCell)
1182 const size_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
1183 const size_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
1185 while ((rowIdx + 1) > GetDataRowCount())
1187 std::vector<std::string> row;
1188 row.resize(GetDataColumnCount());
1189 mData.push_back(row);
1192 if ((columnIdx + 1) > GetDataColumnCount())
1194 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
1196 itRow->resize(columnIdx + 1);
1202 converter.
ToStr(pCell, str);
1203 mData.at(rowIdx).at(columnIdx) = str;
1212 template<
typename T>
1213 void SetCell(
const std::string& pColumnName,
const std::string& pRowName,
const T& pCell)
1218 throw std::out_of_range(
"column not found: " + pColumnName);
1221 const ssize_t rowIdx =
GetRowIdx(pRowName);
1224 throw std::out_of_range(
"row not found: " + pRowName);
1227 SetCell<T>(columnIdx, rowIdx, pCell);
1237 const ssize_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
1240 throw std::out_of_range(
"column name row index < 0: " + std::to_string(mLabelParams.
mColumnNameIdx));
1253 const ssize_t columnIdx = pColumnIdx + (mLabelParams.
mRowNameIdx + 1);
1254 mColumnNames[pColumnName] = columnIdx;
1257 throw std::out_of_range(
"column name row index < 0: " + std::to_string(mLabelParams.
mColumnNameIdx));
1262 if (rowIdx >=
static_cast<int>(mData.size()))
1264 mData.resize(rowIdx + 1);
1266 auto& row = mData[rowIdx];
1267 if (columnIdx >=
static_cast<int>(row.size()))
1269 row.resize(columnIdx + 1);
1272 mData.at(mLabelParams.
mColumnNameIdx).at(columnIdx) = pColumnName;
1283 return std::vector<std::string>(mData.at(mLabelParams.
mColumnNameIdx).begin() +
1288 return std::vector<std::string>();
1298 const ssize_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
1301 throw std::out_of_range(
"row name column index < 0: " + std::to_string(mLabelParams.
mRowNameIdx));
1304 return mData.at(rowIdx).at(mLabelParams.
mRowNameIdx);
1314 const ssize_t rowIdx = pRowIdx + (mLabelParams.
mColumnNameIdx + 1);
1315 mRowNames[pRowName] = rowIdx;
1318 throw std::out_of_range(
"row name column index < 0: " + std::to_string(mLabelParams.
mRowNameIdx));
1322 if (rowIdx >=
static_cast<int>(mData.size()))
1324 mData.resize(rowIdx + 1);
1326 auto& row = mData[rowIdx];
1327 if (mLabelParams.
mRowNameIdx >=
static_cast<int>(row.size()))
1332 mData.at(rowIdx).at(mLabelParams.
mRowNameIdx) = pRowName;
1341 std::vector<std::string> rownames;
1344 for (
auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
1348 rownames.push_back(itRow->at(mLabelParams.
mRowNameIdx));
1358 std::ifstream stream;
1359 stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
1360 stream.open(mPath, std::ios::binary);
1364 void ReadCsv(std::istream& pStream)
1367 pStream.seekg(0, std::ios::end);
1368 std::streamsize length = pStream.tellg();
1369 pStream.seekg(0, std::ios::beg);
1372 std::vector<char> bom2b(2,
'\0');
1375 pStream.read(bom2b.data(), 2);
1376 pStream.seekg(0, std::ios::beg);
1379 static const std::vector<char> bomU16le = {
'\xff',
'\xfe' };
1380 static const std::vector<char> bomU16be = {
'\xfe',
'\xff' };
1381 if ((bom2b == bomU16le) || (bom2b == bomU16be))
1384 mIsLE = (bom2b == bomU16le);
1386 std::wifstream wstream;
1387 wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit);
1388 wstream.open(mPath, std::ios::binary);
1391 wstream.imbue(std::locale(wstream.getloc(),
1392 new std::codecvt_utf16<
wchar_t, 0x10ffff,
1393 static_cast<std::codecvt_mode
>(std::consume_header |
1394 std::little_endian)>));
1398 wstream.imbue(std::locale(wstream.getloc(),
1399 new std::codecvt_utf16<
wchar_t, 0x10ffff,
1400 std::consume_header>));
1402 std::wstringstream wss;
1403 wss << wstream.rdbuf();
1404 std::string utf8 = ToString(wss.str());
1405 std::stringstream ss(utf8);
1406 ParseCsv(ss, utf8.size());
1414 std::vector<char> bom3b(3,
'\0');
1415 pStream.read(bom3b.data(), 3);
1416 static const std::vector<char> bomU8 = {
'\xef',
'\xbb',
'\xbf' };
1420 pStream.seekg(0, std::ios::beg);
1429 ParseCsv(pStream, length);
1433 void ParseCsv(std::istream& pStream, std::streamsize p_FileLength)
1435 const std::streamsize bufLength = 64 * 1024;
1436 std::vector<char> buffer(bufLength);
1437 std::vector<std::string> row;
1439 bool quoted =
false;
1443 while (p_FileLength > 0)
1445 std::streamsize readLength = std::min<std::streamsize>(p_FileLength, bufLength);
1446 pStream.read(buffer.data(), readLength);
1447 for (
int i = 0; i < readLength; ++i)
1449 if (buffer[i] ==
'"')
1451 if (cell.empty() || cell[0] ==
'"')
1457 else if (buffer[i] == mSeparatorParams.
mSeparator)
1461 row.push_back(Unquote(Trim(cell)));
1469 else if (buffer[i] ==
'\r')
1480 else if (buffer[i] ==
'\n')
1495 row.push_back(Unquote(Trim(cell)));
1504 mData.push_back(row);
1518 p_FileLength -= readLength;
1522 if (!cell.empty() || !row.empty())
1524 row.push_back(Unquote(Trim(cell)));
1526 mData.push_back(row);
1531 mSeparatorParams.
mHasCR = (cr > (lf / 2));
1535 (
static_cast<ssize_t
>(mData.size()) > mLabelParams.
mColumnNameIdx))
1540 mColumnNames[columnName] = i++;
1546 (
static_cast<ssize_t
>(mData.size()) >
1550 for (
auto& dataRow : mData)
1552 if (
static_cast<ssize_t
>(dataRow.size()) > mLabelParams.
mRowNameIdx)
1554 mRowNames[dataRow[mLabelParams.
mRowNameIdx]] = i++;
1560 void WriteCsv()
const
1565 std::stringstream ss;
1567 std::string utf8 = ss.str();
1568 std::wstring wstr = ToWString(utf8);
1570 std::wofstream wstream;
1571 wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit);
1572 wstream.open(mPath, std::ios::binary | std::ios::trunc);
1576 wstream.imbue(std::locale(wstream.getloc(),
1577 new std::codecvt_utf16<
wchar_t, 0x10ffff,
1578 static_cast<std::codecvt_mode
>(std::little_endian)>));
1582 wstream.imbue(std::locale(wstream.getloc(),
1583 new std::codecvt_utf16<wchar_t, 0x10ffff>));
1586 wstream << static_cast<wchar_t>(0xfeff);
1592 std::ofstream stream;
1593 stream.exceptions(std::ofstream::failbit | std::ofstream::badbit);
1594 stream.open(mPath, std::ios::binary | std::ios::trunc);
1599 void WriteCsv(std::ostream& pStream)
const
1601 for (
auto itr = mData.begin(); itr != mData.end(); ++itr)
1603 for (
auto itc = itr->begin(); itc != itr->end(); ++itc)
1606 ((itc->find(mSeparatorParams.
mSeparator) != std::string::npos) ||
1607 (itc->find(
' ') != std::string::npos)))
1610 std::string str = *itc;
1611 ReplaceString(str,
"\"",
"\"\"");
1613 pStream <<
"\"" << str <<
"\"";
1625 pStream << (mSeparatorParams.
mHasCR ?
"\r\n" :
"\n");
1629 size_t GetDataRowCount()
const
1631 return mData.size();
1634 size_t GetDataColumnCount()
const
1636 return (mData.size() > 0) ? mData.at(0).size() : 0;
1639 std::string Trim(
const std::string& pStr)
1641 if (mSeparatorParams.
mTrim)
1643 std::string str = pStr;
1646 str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](
int ch) { return !isspace(ch); }));
1649 str.erase(std::find_if(str.rbegin(), str.rend(), [](
int ch) { return !isspace(ch); }).base(), str.end());
1659 std::string Unquote(
const std::string& pStr)
1661 if (mSeparatorParams.
mAutoQuote && (pStr.size() >= 2) && (pStr.front() ==
'"') && (pStr.back() ==
'"'))
1664 std::string str = pStr.substr(1, pStr.size() - 2);
1667 ReplaceString(str,
"\"\"",
"\"");
1678 #if defined(_MSC_VER)
1679 #pragma warning (disable: 4996)
1681 static std::string ToString(
const std::wstring& pWStr)
1683 return std::wstring_convert<std::codecvt_utf8<wchar_t>,
wchar_t>{ }.to_bytes(pWStr);
1686 static std::wstring ToWString(
const std::string& pStr)
1688 return std::wstring_convert<std::codecvt_utf8<wchar_t>,
wchar_t>{ }.from_bytes(pStr);
1690 #if defined(_MSC_VER)
1691 #pragma warning (default: 4996)
1695 static void ReplaceString(std::string& pStr,
const std::string& pSearch,
const std::string& pReplace)
1699 while ((pos = pStr.find(pSearch, pos)) != std::string::npos)
1701 pStr.replace(pos, pSearch.size(), pReplace);
1702 pos += pReplace.size();
1708 LabelParams mLabelParams;
1709 SeparatorParams mSeparatorParams;
1710 ConverterParams mConverterParams;
1711 LineReaderParams mLineReaderParams;
1712 std::vector<std::vector<std::string>> mData;
1713 std::map<std::string, size_t> mColumnNames;
1714 std::map<std::string, size_t> mRowNames;
1716 bool mIsUtf16 =
false;