File: Internal\Synthesis\PcmConverter.cs
Web Access
Project: src\src\runtime\src\libraries\System.Speech\src\System.Speech.csproj (System.Speech)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;

namespace System.Speech.Internal.Synthesis
{
    internal class PcmConverter
    {
        #region Internal Methods

        /// <summary>
        ///   Description:
        ///   first read samples into VAPI_PCM16, then judge cases :
        ///   1. STEREO -> mono + resampling
        ///      STEREO  -> 1 mono -> reSampling
        ///   2. mono  -> STEREO + resampling
        ///      mono   -> reSampling -> STEREO
        ///   3. STEREO  -> STEREO + resampling
        ///      STEREO  -> 2 MONO - > reSampling -> 2 MONO -> STEREO
        ///   4. mono  -> mono + resampling
        ///      mono  -> reSampling -> mono
        /// </summary>
        internal bool PrepareConverter(ref WAVEFORMATEX inWavFormat, ref WAVEFORMATEX outWavFormat)
        {
            bool convert = true;
            // Check if we can deal with the format
            if (!(inWavFormat.nSamplesPerSec > 0 && inWavFormat.nChannels <= 2 && inWavFormat.nChannels > 0 && outWavFormat.nChannels > 0 && outWavFormat.nSamplesPerSec > 0 && outWavFormat.nChannels <= 2))
            {
                throw new FormatException();
            }

            _iInFormatType = AudioFormatConverter.TypeOf(inWavFormat);
            _iOutFormatType = AudioFormatConverter.TypeOf(outWavFormat);
            if (_iInFormatType < 0 || _iOutFormatType < 0)
            {
                throw new FormatException();
            }

            // Check if Format in == Format out
            if (outWavFormat.nSamplesPerSec == inWavFormat.nSamplesPerSec && _iOutFormatType == _iInFormatType && outWavFormat.nChannels == inWavFormat.nChannels)
            {
                convert = false;
            }
            else
            {
                //--- need reset filter
                if (inWavFormat.nSamplesPerSec != outWavFormat.nSamplesPerSec)
                {
                    CreateResamplingFilter(inWavFormat.nSamplesPerSec, outWavFormat.nSamplesPerSec);
                }

                // Keep a reference to the WaveHeaderformat
                _inWavFormat = inWavFormat;
                _outWavFormat = outWavFormat;
            }
            return convert;
        }

        /// <summary>
        ///   Description:
        ///   first read samples into VAPI_PCM16, then judge cases :
        ///   1. STEREO -> mono + resampling
        ///      STEREO  -> 1 mono -> reSampling
        ///   2. mono  -> STEREO + resampling
        ///      mono   -> reSampling -> STEREO
        ///   3. STEREO  -> STEREO + resampling
        ///      STEREO  -> 2 MONO - > reSampling -> 2 MONO -> STEREO
        ///   4. mono  -> mono + resampling
        ///      mono  -> reSampling -> mono
        /// </summary>
        internal byte[] ConvertSamples(byte[] pvInSamples)
        {
            short[] pnBuff;

            //--- Convert samples to VAPI_PCM16
            short[] inSamples = AudioFormatConverter.Convert(pvInSamples, _iInFormatType, AudioCodec.PCM16);

            //--- case 1
            if (_inWavFormat.nChannels == 2 && _outWavFormat.nChannels == 1)
            {
                pnBuff = Resample(_inWavFormat, _outWavFormat, Stereo2Mono(inSamples), _leftMemory);
            }

            //--- case 2
            else if (_inWavFormat.nChannels == 1 && _outWavFormat.nChannels == 2)
            {
                //--- resampling
                pnBuff = Mono2Stereo(Resample(_inWavFormat, _outWavFormat, inSamples, _leftMemory));
            }

            //--- case 3
            else if (_inWavFormat.nChannels == 2 && _outWavFormat.nChannels == 2)
            {
                if (_inWavFormat.nSamplesPerSec != _outWavFormat.nSamplesPerSec)
                {
                    short[] leftChannel;
                    short[] rightChannel;
                    SplitStereo(inSamples, out leftChannel, out rightChannel);
                    pnBuff = MergeStereo(Resample(_inWavFormat, _outWavFormat, leftChannel, _leftMemory), Resample(_inWavFormat, _outWavFormat, rightChannel, _rightMemory));
                }
                else
                {
                    pnBuff = inSamples;
                }
            }

            //--- case 4
            else if (_inWavFormat.nChannels == 1 && _outWavFormat.nChannels == 1)
            {
                pnBuff = Resample(_inWavFormat, _outWavFormat, inSamples, _leftMemory);
            }
            else
            {
                System.Diagnostics.Debug.Fail("Invalid wave format");
                pnBuff = null!;
            }

            _eChunkStatus = Block.Middle;
            //---Convert samples to output format
            return AudioFormatConverter.Convert(pnBuff, AudioCodec.PCM16, _iOutFormatType);
        }

        #endregion

        #region private Fields

        /// <summary>
        /// Convert the data from one sample rate to an another
        /// </summary>
        private short[] Resample(WAVEFORMATEX inWavFormat, WAVEFORMATEX outWavFormat, short[] pnBuff, float[] memory)
        {
            if (inWavFormat.nSamplesPerSec != outWavFormat.nSamplesPerSec)
            {
                float[] pdBuff = Short2Float(pnBuff);

                //--- resample
                pdBuff = Resampling(pdBuff, memory);

                pnBuff = Float2Short(pdBuff);
            }
            return pnBuff;
        }

        /// <summary>
        /// convert short array to float array
        /// </summary>
        private static float[] Short2Float(short[] inSamples)
        {
            float[] pdOut = new float[inSamples.Length];

            for (int i = 0; i < inSamples.Length; i++)
            {
                pdOut[i] = inSamples[i];
            }

            return pdOut;
        }

        /// <summary>
        /// convert float array to short array
        /// </summary>
        private static short[] Float2Short(float[] inSamples)
        {
            short[] outSamples = new short[inSamples.Length];
            float dtmp;

            for (int i = 0; i < inSamples.Length; i++)
            {
                if (inSamples[i] >= 0)
                {
                    dtmp = inSamples[i] + 0.5f;
                    if (dtmp > short.MaxValue)
                    {
                        dtmp = short.MaxValue;
                    }
                }
                else
                {
                    dtmp = inSamples[i] - 0.5f;
                    if (dtmp < short.MinValue)
                    {
                        dtmp = short.MinValue;
                    }
                }
                outSamples[i] = (short)(dtmp);
            }
            return outSamples;
        }

        /// <summary>
        /// convert mono speech to stereo speech
        /// </summary>
        private static short[] Mono2Stereo(short[] inSamples)
        {
            short[] outSamples = new short[inSamples.Length * 2];

            for (int i = 0, k = 0; i < inSamples.Length; i++, k += 2)
            {
                outSamples[k] = inSamples[i];
                outSamples[k + 1] = inSamples[i];
            }

            return outSamples;
        }

        /// <summary>
        /// convert stereo speech to mono speech
        /// </summary>
        private static short[] Stereo2Mono(short[] inSamples)
        {
            short[] outSamples = new short[inSamples.Length / 2];

            for (int i = 0, k = 0; i < inSamples.Length; i += 2, k++)
            {
                outSamples[k] = unchecked((short)((inSamples[i] + inSamples[i + 1]) / 2));
            }

            return outSamples;
        }

        /// <summary>
        /// merge 2 channel signals into one signal
        /// </summary>
        private static short[] MergeStereo(short[] leftSamples, short[] rightSamples)
        {
            short[] outSamples = new short[leftSamples.Length * 2];

            for (int i = 0, k = 0; i < leftSamples.Length; i++, k += 2)
            {
                outSamples[k] = leftSamples[i];
                outSamples[k + 1] = rightSamples[i];
            }

            return outSamples;
        }

        /// <summary>
        /// split stereo signals into 2 channel mono signals
        /// </summary>
        private static void SplitStereo(short[] inSamples, out short[] leftSamples, out short[] rightSamples)
        {
            int length = inSamples.Length / 2;

            leftSamples = new short[length];
            rightSamples = new short[length];

            for (int i = 0, k = 0; i < inSamples.Length; i += 2)
            {
                leftSamples[k] = inSamples[i];
                rightSamples[k] = inSamples[i + 1];
            }
        }

        [MemberNotNull(nameof(_filterCoeff))]
        [MemberNotNull(nameof(_leftMemory))]
        [MemberNotNull(nameof(_rightMemory))]
        private void CreateResamplingFilter(int inHz, int outHz)
        {
            int iLimitFactor;

            if (inHz <= 0)
            {
                throw new ArgumentOutOfRangeException(nameof(inHz));
            }

            if (outHz <= 0)
            {
                throw new ArgumentOutOfRangeException(nameof(outHz));
            }

            FindResampleFactors(inHz, outHz);
            iLimitFactor = (_iUpFactor > _iDownFactor) ? _iUpFactor : _iDownFactor;

            _iFilterHalf = (int)(inHz * iLimitFactor * _dHalfFilterLen);
            _iFilterLen = 2 * _iFilterHalf + 1;

            _filterCoeff = WindowedLowPass(.5f / iLimitFactor, _iUpFactor);

            _iBuffLen = (int)(_iFilterLen / (float)_iUpFactor);

            _leftMemory = new float[_iBuffLen];
            _rightMemory = new float[_iBuffLen];

            _eChunkStatus = Block.First; // first chunk
        }

        /// <summary>
        /// Creates a low pass filter using the windowing method.
        /// dCutOff is spec. in normalized frequency
        /// </summary>
        private float[] WindowedLowPass(float dCutOff, float dGain)
        {
            float[]? pdCoeffs = null;
            float[]? pdWindow = null;
            double dArg;
            double dSinc;

            System.Diagnostics.Debug.Assert(dCutOff > 0.0 && dCutOff < 0.5);

            pdWindow = Blackman(_iFilterLen, true);

            pdCoeffs = new float[_iFilterLen];

            dArg = 2.0f * Math.PI * dCutOff;
            pdCoeffs[_iFilterHalf] = (float)(dGain * 2.0 * dCutOff);

            for (long i = 1; i <= _iFilterHalf; i++)
            {
                dSinc = dGain * Math.Sin(dArg * i) / (Math.PI * i) * pdWindow[_iFilterHalf - i];
                pdCoeffs[_iFilterHalf + i] = (float)dSinc;
                pdCoeffs[_iFilterHalf - i] = (float)dSinc;
            }

            return pdCoeffs;
        }

        private void FindResampleFactors(int inHz, int outHz)
        {
            int iDiv = 1;
            int i;

            while (iDiv != 0)
            {
                iDiv = 0;
                for (i = 0; i < s_piPrimes.Length; i++)
                {
                    if ((inHz % s_piPrimes[i]) == 0 && (outHz % s_piPrimes[i]) == 0)
                    {
                        inHz /= s_piPrimes[i];
                        outHz /= s_piPrimes[i];
                        iDiv = 1;
                        break;
                    }
                }
            }

            _iUpFactor = outHz;
            _iDownFactor = inHz;
        }

        private float[] Resampling(float[] inSamples, float[] pdMemory)
        {
            int cInSamples = inSamples.Length;
            int cOutSamples;
            int iPhase;
            int j;
            int n;
            int iAddHalf;

            if (_eChunkStatus == Block.First)
            {
                cOutSamples = (cInSamples * _iUpFactor - _iFilterHalf) / _iDownFactor;
                iAddHalf = 1;
            }
            else if (_eChunkStatus == Block.Middle)
            {
                cOutSamples = (cInSamples * _iUpFactor) / _iDownFactor;
                iAddHalf = 2;
            }
            else
            {
                System.Diagnostics.Debug.Assert(_eChunkStatus == Block.Last);
                cOutSamples = (_iFilterHalf * _iUpFactor) / _iDownFactor;
                iAddHalf = 2;
            }

            if (cOutSamples < 0)
            {
                cOutSamples = 0;
            }
            float[] outSamples = new float[cOutSamples];

            for (int i = 0; i < cOutSamples; i++)
            {
                double dAcum = 0.0;

                n = ((i * _iDownFactor - iAddHalf * _iFilterHalf) / _iUpFactor);
                iPhase = (i * _iDownFactor) - (n * _iUpFactor + iAddHalf * _iFilterHalf);

                for (j = 0; j < _iFilterLen / _iUpFactor; j++)
                {
                    if (_iUpFactor * j > iPhase)
                    {
                        if (n + j >= 0 && n + j < cInSamples)
                        {
                            dAcum += inSamples[n + j] * _filterCoeff[_iUpFactor * j - iPhase];
                        }
                        else if (n + j < 0)
                        {
                            dAcum += pdMemory[_iBuffLen + n + j] * _filterCoeff[_iUpFactor * j - iPhase];
                        }
                    }
                }

                outSamples[i] = (float)dAcum;
            }

            //--- store samples into buffer
            if (_eChunkStatus != Block.Last)
            {
                n = cInSamples - (_iBuffLen + 1);
                for (int i = 0; i < _iBuffLen; i++)
                {
                    if (n >= 0)
                    {
                        pdMemory[i] = inSamples[n++];
                    }
                    else
                    {
                        n++;
                        pdMemory[i] = 0.0f;
                    }
                }
            }

            return outSamples;
        }

        /// <summary>
        /// Returns a vector with a Blackman window of the specified length.
        /// </summary>
        private static float[] Blackman(int iLength, bool bSymmetric)
        {
            float[] pdWindow = new float[iLength];
            double dArg, dArg2;

            dArg = 2.0 * Math.PI;
            if (bSymmetric)
            {
                dArg /= (float)(iLength - 1);
            }
            else
            {
                dArg /= (float)iLength;
            }

            dArg2 = 2.0 * dArg;

            for (int i = 0; i < iLength; i++)
            {
                pdWindow[i] = (float)(0.42 - (0.5 * Math.Cos(dArg * i)) + (0.08 * Math.Cos(dArg2 * i)));
            }

            return pdWindow;
        }

        #endregion

        #region private Fields

        private enum Block
        {
            First,
            Middle,
            Last
        };

        private WAVEFORMATEX _inWavFormat;
        private WAVEFORMATEX _outWavFormat;
        private AudioCodec _iInFormatType;
        private AudioCodec _iOutFormatType;

        private Block _eChunkStatus;
        private int _iUpFactor;
        private int _iFilterHalf;
        private int _iDownFactor;
        private int _iFilterLen;
        private int _iBuffLen;

        // Arrays are initialized if resampling is necessary
        private float[] _filterCoeff = null!;
        private float[] _leftMemory = null!;
        private float[] _rightMemory = null!;

        private const float _dHalfFilterLen = 0.0005f;

        private static readonly int[] s_piPrimes = new int[] { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37 };

        #endregion
    }
}