Kaldi学习笔记 ivector-plda-scoring-dense.cc 解读

1 Usage

ivector-plda-scoring-dense [options] <plda> <reco2utt> <ivectors-rspecifier> <scores-wspecifier>

<plda> is where to store the pre-trained plda model.
<reco2utt> can be spk2utt.
ivectors-rspecifier is where to store ivectors.
scores-wspecifier is where to output scores.
Performs between all pairs of iVectors in a recording and output.

2 Code and Comments

    # read each recording's segments
    for (; !reco2utt_reader.Done(); reco2utt_reader.Next()) {
      Plda this_plda(plda);
      std::string reco = reco2utt_reader.Key();
     
      # the utterances into a vector (list)
      std::vector<std::string> uttlist = reco2utt_reader.Value();
      # all ivectors of all utterances into a vector
      std::vector<Vector<BaseFloat> > ivectors;
      
      # for each utterance (segment)
      for (size_t i = 0; i < uttlist.size(); i++) {
        std::string utt = uttlist[i];

        if (!ivector_reader.HasKey(utt)) {
          KALDI_ERR << "No iVector present in input for utterance " << utt;
        }
        
        # get current utterance's ivector and add it into a list
        Vector<BaseFloat> ivector = ivector_reader.Value(utt);
        ivectors.push_back(ivector);
      }

      if (ivectors.size() == 0) {
        KALDI_WARN << "Not producing output for recording " << reco
                   << " since no segments had iVectors";
        num_reco_err++;
      } else {
        Matrix<BaseFloat> ivector_mat(ivectors.size(), ivectors[0].Dim()),
                          ivector_mat_pca,
                          ivector_mat_plda,
                          pca_transform,
                          scores(ivectors.size(), ivectors.size());

        # copy all ivectors into a matrix
        for (size_t i = 0; i < ivectors.size(); i++) {
          ivector_mat.Row(i).CopyFromVec(ivectors[i]);
        }

        if (EstPca(ivector_mat, target_energy, reco, &pca_transform)) {
          // Apply the PCA transform to the raw i-vectors.
          ApplyPca(ivector_mat, pca_transform, &ivector_mat_pca);

          // Apply the PCA transform to the parameters of the PLDA model.
          this_plda.ApplyTransform(Matrix<double>(pca_transform));

          // Now transform the i-vectors using the reduced PLDA model.
          TransformIvectors(ivector_mat_pca, plda_config, this_plda,
            &ivector_mat_plda);
        } else {
          // If EstPca returns false, we won't apply any PCA.
          TransformIvectors(ivector_mat, plda_config, this_plda,
          &ivector_mat_plda);
        }
        
        # for each pair, calculate the plda score
        for (int32 i = 0; i < ivector_mat_plda.NumRows(); i++) {
          for (int32 j = 0; j < ivector_mat_plda.NumRows(); j++) {
            scores(i, j) = this_plda.LogLikelihoodRatio(Vector<double>(
              ivector_mat_plda.Row(i)), 1.0,
              Vector<double>(ivector_mat_plda.Row(j)));
          }
        }
        scores_writer.Write(reco, scores);
        num_reco_done++;
      }
    }

3 Summary

This file mainly goes through each recording and its corresponding segments (utterances). For each recording, it calculates plda scores for every pair (combination) and output them as matrices.