I have image elements in my DOM that use blob URL in their src attribute. Think like this:
<img class="full-media opacity-transition slow open shown" width="432"
height="432" alt="" draggable="true"
src="blob:https://web.telegram.org/1c037f99-5201-45de-937c-548da5706021">
What I want is to be able to do some ocr on this image , however fetching the full url or even without the blob gives me a 404. How can I get the image either as a file or any acceptable format for tesseract.recognize?
EDIT
I've gotten the blob object and converted it to base64 in order to use it with tesseract on the node server side. This is what I tried so far: client side:
let imgOcr='blob:https://web.telegram.org/aab754ff-1bec-40bc-b55f-e2b08bf59ccb';
const blob = await fetch(imgOcr).then(res => res.blob());
var xj = new XMLHttpRequest();
xj.open("POST", "http://localhost:3000/imgocr", true);
xj.setRequestHeader('Content-type','application/x-www-form-urlencoded');
xj.send(blob);
xj.onreadystatechange = function () { console.log(xj.responseText); }
server side:
// Configuring body parser middleware
app.use(bodyParser.urlencoded({ extended: false }));
var upperBound = '1gb';
app.use(bodyParser.urlencoded({extended: false, limit: upperBound}));
//test for image ocr
app.post('/imgocr',(req,res)=> {
console.dir(req.body);
var reader = new FileReader();
reader.readAsDataURL(req.body);
reader.onloadend = function () {
var base64String = reader.result;
console.log('Base64 String - ', base64String);
console.log("tEsseract startng");
Tesseract.recognize(base64String,'eng',{
logger:m =>console.log("PROGRESS: "+m)
}).then(({ data:{text}})=>{
console.log("tESS RES: "+text);
})
}
});
but I get this error and I don't understand what am I doing wrong here:
cannot read as File: {"����\u0000\u0010JFIF\u0000\u0001\u0001\u0001\u0000H\u0000H\u0000\u0000��\u0000C\u0000\u0004\u0003\u0003\u0004\u0003\u0003\u0004\u0004\u0003\u0004\u0005\u0004\u0004\u0005\u0006\n\u0007\u0006\u0006\u0006\u0006\r\t\n\b\n\u00\u001e.ߒ}h�v���S�F�f�tz\u001d�A���'֏\u0017o�>�>�jFis\u000e�C��<]�$�����\nm]�5#4���y\u0011x�:ő���K�.���]\u001a�e\u0010�#�H`b�U��U�#<����Q\u0007\u0001�x,S]\u0004�<�Ѩ�Q\u000f\u0005 at readFile (/Users/user/node_modules/filereader/FileReader.js:266:15) at FileReader.self.readAsDataURL (/Users/user/node_modules/filereader/FileReader.js:292:7) at /Users/user/Desktop/nodeServer/app.js:24:12
puppeteerto take the screenshot of thatimgelement and save the screenshot on disk. Once you have the screenshot on disk, you can perform OCR on it.