This SO link gives two techniques - using data URIs and using a helper function to fetch the image from some storage location.
Edit: My initial implementation was using the simpler data URI method. Later, also implemented using the other method.
Intially,
// to prevent images from being dropped due to timeouts, we'll encode them inline
// https://ads-developers.googleblog.com/2013/11/how-to-send-pdf-reports-with-adwords.html
// to parse the img src, we'll use regex, since the html is of limited scope
// https://stackoverflow.com/questions/14939296/extract-image-src-from-a-string/15013465
// https://support.google.com/a/answer/1346938
var m;
imageurls = [];
regexstring = '(http\\S*' + currentTicket +')'; // the \ is escaped, so it becomes doubled.
//Logger.log(regexstring);
//myregex = new RegExp(/(http\S*0d0a226a47229313863)/,"gi"); // the ticket is the string after S*
myregex = new RegExp(regexstring, "gi");
var imageindex=1;
while ( m = myregex.exec( contentdata ) ) {
imageurls.push( m[1] );
//Logger.log(m[1])
var imagemimetypeandenc = 'data:image/png;base64,'; // default
if ( m[1].includes('.jpg?ticket') || m[1].includes('.jpeg?ticket') ) {
imagemimetypeandenc = 'data:image/jpeg;base64,';
}
else if ( m[1].includes('.gif?ticket') || m[1].includes('.GIF?ticket') ) {
imagemimetypeandenc = 'data:image/gif;base64,';
}
else if ( m[1].includes('.tif?ticket') || m[1].includes('.tiff?ticket') ) {
imagemimetypeandenc = 'data:image/tiff;base64,';
}
Logger.log('%s of assetid %s',imageindex.toString(),data_array[i][0].toString());
imageindex = imageindex + 1;
var imageBlob = UrlFetchApp.fetch(m[1]).getBlob();
var base64EncodedBytes = Utilities.base64Encode(imageBlob.getBytes());
var imageencoded = imagemimetypeandenc + base64EncodedBytes;
contentdata = contentdata.replace(m[1], imageencoded);
}
and later, when images were available on Google Drive,
var placeholder = '_a_few_kb_of_chars_QAAAABJRU5ErkJggg=='; // made with https://www.w3docs.com/tools/image-base64
.....
var m;
imageurls = [];
regexstring = 'img\\s*src\\s*=\\s*"(http\\S*)"'; // the \ is escaped, so it becomes doubled.
// Only the http part is captured, within parenthesis
// currently it matches only if the img src is within double quotes. "https://whatever"
//Logger.log(regexstring);
myregex = new RegExp(regexstring, "gi");
var imageindex=1;
while ( m = myregex.exec( contentdata ) ) {
imageurls.push( m[1] );
Logger.log('%s of assetid %s',imageindex.toString(),data_array[i][0].toString());
imageindex = imageindex + 1;
// here we use a helper if the url contains the alfresco url
var response;
if (m[1].includes('ticket=') ) {
//use the helper function after extracting the filename
Logger.log('Downloading %s using Alfresco helper', m[1])
var fname = m[1].split('/')[9]; // from the url similar to
// https://www.ourdomain.tld/alfresco/d/direct/workspace/NameOfStore/f8b56b7c-long-id-a35c30be756a/filename-15092020100930PM?ticket=
fname = fname.split('?')[0];
// handle cases where fname does not have extension
var urlFromHelper = UrlFetchApp.fetch(returnUrl+encodeURIComponent(fname));
response = UrlFetchApp.fetch(urlFromHelper, { muteHttpExceptions: true });
}
if (response.getResponseCode()==200) {
var imageBlob = response.getBlob();
var base64EncodedBytes = Utilities.base64Encode(imageBlob.getBytes());
//var ctype = response.getHeaders().Content-Type
var headermap = new Map(Object.entries(response.getHeaders()));
var mimetypeofim = headermap.get('Content-Type');
var imagemimetypeandenc = 'data:'+mimetypeofim+';base64,';
var imageencoded = imagemimetypeandenc + base64EncodedBytes;
contentdata = contentdata.replace(m[1], placeholder);
}
And in cases where we did not want the images to be base64 encoded - for example in the case of large images, which would cause the function to run out of memory - the whole code-block above would be not needed, since the img src link in the original html would work OK.
No comments:
Post a Comment