0

I have an error when I try to predict with an LSTM model in React with TensorFlow.js. The purpose of the React project is to translate sign language and I use media pipe holistic to detect key points of the face, hands and pose. To extract the key point I create a function that uses the module "@d4c/numjs" to generate an Ndarray. With that, I generate an array that allows me to count frames and predict, but when I try to predict this error appears:

ERROR Error when checking model: the Array of Tensors that you are passing to your model is not the size the model expected. I expected to see 1 Tensor(s), but instead got 0 Tensor (s**).

ValueError@http://localhost:3000/static/js/bundle.js:126537:5 checkInputData@http://localhost:3000/static/js/bundle.js:124254:13 predict@http://localhost:3000/static/js/bundle.js:124844:19 predict@http://localhost:3000/static/js/bundle.js:138492:23 onResults@http://localhost:3000/static/js/bundle.js:152:27 ./node_modules/@mediapipe/holistic/holistic.js/</Vd/onResults/a.I<@http://localhost:3000/static/js/bundle.js:7788:15

I don't know if the problem is that I use the module "@d4c/numjs" for the project or if I missing a reshape of some sort. The LSTM input layers is : lstm_input (InputLayer) [[null,15,1662]]

The error occurs in this particular lines:

 kp_sequence.push(extractKeyPoints(results));

    if (kp_sequence.length > 15 && thereHand(results)) {
      count_frame++;
    } else {
        if (count_frame >= 5) {
             const kpArray = nj.stack(kp_sequence.slice(-15)).reshape([1, 15, -1]);
             const res = model.predict(kpArray).tolist()[0];

            if (res.indexOf(Math.max(...res)) > 0.7) {
              console.log("prediciendo");
              const sent = actions[res.indexOf(Math.max(...res))];
              sentence.unshift(sent);
              [sentence, repe_sent] = formatSentences(sent, sentence, repe_sent);
              } 
            count_frame = 0;
            kp_sequence = [];
        }
    }

I posted the full code below, I hope you can help me with this one, I'm new to JavaScript. Thanks

import * as tf from '@tensorflow/tfjs';
import './App.css';
import React, {useEffect, useRef} from 'react';
import Webcam from "react-webcam";
import * as cam from "@mediapipe/camera_utils";
import {FACEMESH_TESSELATION,HAND_CONNECTIONS,POSE_CONNECTIONS, Holistic} from '@mediapipe/holistic';
import './App.css';
import nj from "@d4c/numjs";  
import { log } from '@d4c/numjs/build/main/lib';

function App() {
  let model = undefined;
  var count_frame=0;
  var repe_sent=1;
  var kp_sequence=[];
  var sentence = [];

  const webcamRef = useRef(null);
  const canvasRef = useRef(null);
  const connect = window.drawConnectors;
  var camera = null;

  async function loadModel(){

    model = await tf.loadLayersModel('url');
    model.summary();
  }
  loadModel();


function formatSentences(sent, sentence, repe_sent) {
    if (sentence.length > 1) {
        if (sentence[1].includes(sent)) {
          repe_sent++;
            sentence.shift();
            sentence[0] = `${sent} (x${repe_sent})`;
        } else {
          repe_sent = 1;
        }
    }
    return [sentence, repe_sent];
}


  function thereHand(results) {
    return !!results.leftHandLandmarks || !!results.rightHandLandmarks;
}

  function extractKeyPoints(results) {
    let pose = [];

    if (results.poseLandmarks) {
        pose = nj.array(results.poseLandmarks.map(res => [res.x, res.y, res.z, res.visibility])).flatten().tolist();
    } else {
        pose = nj.zeros(33 * 4).tolist();
    }

    let face = [];
    if (results.faceLandmarks) {
        face = nj.array(results.faceLandmarks.map(res => [res.x, res.y, res.z])).flatten().tolist();
    } else {
        face = nj.zeros(468 * 3).tolist();
    }

    let lh = [];
    if (results.leftHandLandmarks) {
        lh = nj.array(results.leftHandLandmarks.map(res => [res.x, res.y, res.z])).flatten().tolist();
    } else {
        lh = nj.zeros(21 * 3).tolist();
    }

    let rh = [];
    if (results.rightHandLandmarks) {
        rh = nj.array(results.rightHandLandmarks.map(res => [res.x, res.y, res.z])).flatten().tolist();
    } else {
        rh = nj.zeros(21 * 3).tolist();
    }
   

    return nj.concatenate([face,pose, lh, rh]);
}

  function onResults(results){
 
    var actions = ['como estas','gusto en verte','hola','tanto tiempo'];

    const videoWidth = webcamRef.current.video.videoWidth;
    const videoHeight = webcamRef.current.video.videoHeight;

    // Set canvas width
    canvasRef.current.width = videoWidth;
    canvasRef.current.height = videoHeight;

    const canvasElement = canvasRef.current;
    const canvasCtx = canvasElement.getContext("2d");

    canvasCtx.save();
    canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
    canvasCtx.drawImage(results.image, 0, 0,
                        canvasElement.width, canvasElement.height);
  
    canvasCtx.globalCompositeOperation = 'source-over';
    connect(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
                   {color: '#00FF00', lineWidth: 4});
    connect(canvasCtx, results.poseLandmarks,
                  {color: '#FF0000', lineWidth: 2});
    connect(canvasCtx, results.faceLandmarks,FACEMESH_TESSELATION,
                   {color: '#C0C0C070', lineWidth: 1});
    connect(canvasCtx, results.leftHandLandmarks, HAND_CONNECTIONS,
                   {color: '#CC0000', lineWidth: 5});
    connect(canvasCtx, results.leftHandLandmarks,
                  {color: '#00FF00', lineWidth: 2});
    connect(canvasCtx, results.rightHandLandmarks, HAND_CONNECTIONS,
                   {color: '#00CC00', lineWidth: 5});
    connect(canvasCtx, results.rightHandLandmarks,
                  {color: '#FF0000', lineWidth: 2});
    canvasCtx.restore();

    kp_sequence.push(extractKeyPoints(results));

    if (kp_sequence.length > 15 && thereHand(results)) {
      count_frame++;
    } else {
        if (count_frame >= 5) {
             const kpArray = nj.stack(kp_sequence.slice(-15)).reshape([1, 15, -1]);
             const res = model.predict(kpArray).tolist()[0];

            if (res.indexOf(Math.max(...res)) > 0.7) {
              console.log("prediciendo");
              const sent = actions[res.indexOf(Math.max(...res))];
              sentence.unshift(sent);
              [sentence, repe_sent] = formatSentences(sent, sentence, repe_sent);
              } 

            count_frame = 0;
            kp_sequence = [];
        }
    }

  }



  useEffect(()=>{
    const holistic = new Holistic({
      locateFile: (file) => {
      return `https://cdn.jsdelivr.net/npm/@mediapipe/holistic/${file}`;
    }
  })

  holistic.setOptions({
    modelComplexity: 0,
    smoothLandmarks: true,
    enableSegmentation: true,
    smoothSegmentation: true,
    refineFaceLandmarks: false,
    minDetectionConfidence: 0.5,
    minTrackingConfidence: 0.5
  })
holistic.onResults(onResults);

if(typeof webcamRef.current!=="undefined" && webcamRef.current!==null){
    camera = new cam.Camera(webcamRef.current.video,{
      onFrame:async()=>{
        await holistic.send({image:webcamRef.current.video})
      },
      width:640,
      height:480
    });
    camera.start() 
}
})
return (
    <div className="App">
        <Webcam 
        ref={webcamRef}
          style= {{
            position:'absolute',
            marginRight:'auto',
            marginLeft:'auto',
            left:0,
            right:0,
            textAlign:'center',
            zIndex:9,
            width:640,
            height:480,
          }}
        />
        <canvas 
        ref={canvasRef}
          style= {{
            position:'absolute',
            marginRight:'auto',
            marginLeft:'auto',
            left:0,
            right:0,
            textAlign:'center',
            zIndex:9,
            width:640,
            height:480,
          }}>

        </canvas>
    </div>
  );
}
export default App;
2
  • The error you're facing indicates that the input data you're passing to your LSTM model is not in the expected shape. The model expects an input shape of [null, 15, 1662], where null represents the batch size, 15 is the sequence length, and 1662 is the feature dimensionality. However, it seems like your input data may not be correctly shaped or initialized. Commented May 27, 2024 at 5:58
  • In your code, you're attempting to predict using the model inside the onResults function, which is asynchronous and might be causing issues due to the model not being fully loaded yet. You should ensure that the model is fully loaded before attempting to use it for predictions. Commented May 27, 2024 at 5:58

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.