mirror of https://gerrit.osmocom.org/gapk
WIP: add laforge-mmx.diff to illustrate MMX related changes I tried
This commit is contained in:
parent
1ea1d362fd
commit
12caf496aa
|
@ -0,0 +1,167 @@
|
|||
Only in refsrc: .downloaded
|
||||
Only in refsrc: .sp_frm.c.swp
|
||||
diff -u refsrc.orig/sp_frm.c refsrc/sp_frm.c
|
||||
--- refsrc.orig/sp_frm.c 2015-12-27 19:22:13.966296058 +0100
|
||||
+++ refsrc/sp_frm.c 2014-05-13 22:43:56.786205819 +0200
|
||||
@@ -60,6 +60,13 @@
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
+#include <stdio.h>
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+//#define HAVE_MMX
|
||||
+extern int32_t mmx_mac_unsat(int16_t *_x, int16_t *_y);
|
||||
+extern int32_t mmx_mac_sat(int16_t *_x, int16_t *_y);
|
||||
+
|
||||
/*_________________________________________________________________________
|
||||
| |
|
||||
| Include Files |
|
||||
@@ -384,6 +391,7 @@
|
||||
/* get a vector */
|
||||
/*--------------*/
|
||||
|
||||
+ // 16, 32 or 64 iteraitons
|
||||
getNextVec(pswRc);
|
||||
|
||||
/* clear the limiter flag */
|
||||
@@ -432,6 +440,8 @@
|
||||
for (iCnt = 0; iCnt < quantList.iNum; iCnt++)
|
||||
{
|
||||
|
||||
+ // 4 * 16, 32 or 64 iteraitons
|
||||
+
|
||||
/* get a vector */
|
||||
/*--------------*/
|
||||
|
||||
@@ -931,6 +941,7 @@
|
||||
|
||||
for (i = 0; i <= bound; i++)
|
||||
{
|
||||
+ // 3-4 iterations
|
||||
L_sum = L_mac(L_ROUND, pswVOld[i], pswQntRc[j]);
|
||||
L_sum = L_mac(L_sum, pswVOld[-i], pswQntRc[j]);
|
||||
L_sum = L_mac(L_sum, pswPOld[i], pswQntRcSqd[j]);
|
||||
@@ -949,6 +960,7 @@
|
||||
|
||||
for (i = -bound; i < 0; i++)
|
||||
{
|
||||
+ // 3-4 iterations
|
||||
L_sum = L_msu(L_ROUND, pswVOld[i + 1], SW_MIN);
|
||||
L_sum = L_mac(L_sum, pswQntRcSqd[j], pswVOld[-i - 1]);
|
||||
L_sum = L_mac(L_sum, pswQntRc[j], pswPOld[-i - 1]);
|
||||
@@ -958,6 +970,7 @@
|
||||
|
||||
for (i = 0; i <= bound; i++)
|
||||
{
|
||||
+ // 3-4 iterations
|
||||
L_sum = L_msu(L_ROUND, pswVOld[i + 1], SW_MIN);
|
||||
L_sum = L_mac(L_sum, pswQntRcSqd[j], pswVOld[-i - 1]);
|
||||
L_sum = L_mac(L_sum, pswQntRc[j], pswPOld[i + 1]);
|
||||
@@ -4536,6 +4549,11 @@
|
||||
|
||||
pswScaledWSpeech = pswScaledWSpeechBuffer + LSMAX;
|
||||
|
||||
+#if 0
|
||||
+ printf("G_FRAME_LEN=%d\n", G_FRAME_LEN);
|
||||
+ printf("LSMIN=%d\n", LSMIN);
|
||||
+ printf("LSMAX=%d\n", LSMAX);
|
||||
+#endif
|
||||
/*_________________________________________________________________________
|
||||
| |
|
||||
| Executable Code |
|
||||
@@ -4633,11 +4651,20 @@
|
||||
/*---------------------------------------------------------------------*/
|
||||
|
||||
L_G = 0;
|
||||
+#ifndef HAVE_MMX
|
||||
+ // 40 iterations (MMX: 5)
|
||||
for (i = -LSMAX; i < -LSMAX + S_LEN; i++)
|
||||
L_G = L_mac(L_G, pswScaledWSpeech[i], pswScaledWSpeech[i]);
|
||||
+#else
|
||||
+ for (i = -LSMAX; i < -LSMAX + S_LEN; i+=8)
|
||||
+ L_G += mmx_mac_unsat(&pswScaledWSpeech[i], &pswScaledWSpeech[i]);
|
||||
+#endif
|
||||
|
||||
pswGFrame[G_FRAME_LEN - 1] = extract_h(L_G);
|
||||
|
||||
+//#ifndef HAVE_MMX
|
||||
+#if 1
|
||||
+ // 248 iterations (MMX: 31)
|
||||
for (i = -LSMAX; i < G_FRAME_LEN - LSMAX - 1; i++)
|
||||
{
|
||||
|
||||
@@ -4646,6 +4673,13 @@
|
||||
pswScaledWSpeech[i + S_LEN]);
|
||||
pswGFrame[G_FRAME_LEN - LSMAX - 2 - i] = extract_h(L_G);
|
||||
}
|
||||
+#else
|
||||
+ for (i = -LSMAX; i < G_FRAME_LEN - LSMAX - 1; i+= 8) {
|
||||
+ L_G -= mmx_mac_sat(&pswScaledWSpeech[i], &pswScaledWSpeech[i]);
|
||||
+ L_G += mmx_mac_sat(&pswScaledWSpeech[i + S_LEN],
|
||||
+ &pswScaledWSpeech[i + S_LEN]);
|
||||
+ }
|
||||
+#endif
|
||||
|
||||
ppswGSfrm[0] = pswGFrame + 3 * S_LEN;
|
||||
ppswGSfrm[1] = pswGFrame + 2 * S_LEN;
|
||||
@@ -4661,8 +4695,14 @@
|
||||
pswSfrmEng[2] = pswGFrame[G_FRAME_LEN - 1 - LSMAX - 2 * S_LEN];
|
||||
|
||||
L_WSfrmEng = 0;
|
||||
+#ifndef HAVE_MMX
|
||||
+ // 40 iterations (MMX: 5)
|
||||
for (i = F_LEN - S_LEN; i < F_LEN; i++)
|
||||
L_WSfrmEng = L_mac(L_WSfrmEng, pswScaledWSpeech[i], pswScaledWSpeech[i]);
|
||||
+#else
|
||||
+ for (i = F_LEN - S_LEN; i < F_LEN; i+= 8)
|
||||
+ L_WSfrmEng += mmx_mac_unsat(&pswScaledWSpeech[i], &pswScaledWSpeech[i]);
|
||||
+#endif
|
||||
|
||||
pswSfrmEng[3] = extract_h(L_WSfrmEng);
|
||||
|
||||
@@ -4671,19 +4711,26 @@
|
||||
/* as in the G buffer.) */
|
||||
/*------------------------------------------------------------*/
|
||||
|
||||
+ // 4 iterations
|
||||
for (i = 0; i < N_SUB; i++)
|
||||
{
|
||||
|
||||
+ // 127 iterations
|
||||
for (j = LSMIN; j <= LSMAX; j++)
|
||||
{
|
||||
-
|
||||
L_C = 0;
|
||||
+#ifndef HAVE_MMX
|
||||
+ // 4*127*40 iterations (MMX: 4*127*5)
|
||||
for (k = 0; k < S_LEN; k++)
|
||||
{
|
||||
-
|
||||
L_C = L_mac(L_C, pswScaledWSpeech[i * S_LEN + k],
|
||||
pswScaledWSpeech[i * S_LEN - j + k]);
|
||||
}
|
||||
+#else
|
||||
+ for (k = 0; k < S_LEN; k+= 8)
|
||||
+ L_C += mmx_mac_unsat(&pswScaledWSpeech[i*S_LEN + k],
|
||||
+ &pswScaledWSpeech[i*S_LEN - j + k]);
|
||||
+#endif
|
||||
|
||||
pswCFrame[i * CG_TERMS + j - LSMIN] = extract_h(L_C);
|
||||
}
|
||||
@@ -4750,6 +4797,7 @@
|
||||
|
||||
L_Voicing = 0;
|
||||
for (i = 0; i < N_SUB; i++)
|
||||
+ // 4 Iterations
|
||||
L_Voicing = L_mac(L_Voicing, pswSfrmEng[i], UV_SCALE0);
|
||||
|
||||
L_Voicing = L_add(L_Voicing, L_deposit_h(swBestPG));
|
||||
@@ -5085,6 +5133,7 @@
|
||||
siLowestSoFar = 2;
|
||||
for (i = 0; i < N_SUB; i++)
|
||||
{
|
||||
+ // 4 iterations
|
||||
|
||||
/* Check this subframe against highest voicing threshold */
|
||||
/*-------------------------------------------------------*/
|
Loading…
Reference in New Issue