Wednesday, August 04, 2021

parse csv line by line

Opening csv files in Google Sheets and then processing the data using Google Apps Script has the limitation of 50k characters per cell in Google Sheets. The following code snippet gets around this by loading the csv data to memory and parsing line by line.

var entirecsvfilestring = csvfile.getBlob().getDataAsString();
  Logger.log('Loaded the entire csv to memory.');
  // https://stackoverflow.com/questions/21711768/split-string-in-javascript-and-detect-line-break
  var numberOfLineBreaks = (entirecsvfilestring.match(/\n/g)||[]).length;
  Logger.log('Number of line-breaks: ' + numberOfLineBreaks);
  var splitlines = entirecsvfilestring.split("\n");
  var data_range = [];
  var data;

  for (var linei = 1; linei < splitlines.length; linei++) {
    // skipping the first line, linei=0
    if (splitlines[linei].length < 25) {
      continue;
      // don't create the asset if the line is too short to contain relevant data.
      // This can happen if there are blank lines at the end of the csv.
    }
    data = parseCsv(splitlines[linei], ',');
    Logger.log('Parsed line %s', linei);
    .... etc
    
// https://gist.github.com/simonjamain/7e23b898527655609e5ff012f412dd50
function parseCsv(csvString, delimiter) {
  var sanitizedString = csvString.replace(/(["'])(?:(?=(\\?))\2[\s\S])*?\1/g, function(e){return e.replace(/\r?\n|\r/g, ' ') });
  return Utilities.parseCsv(sanitizedString, delimiter)
}
    
    

No comments:

Post a Comment