From 1224279babf6447f3d5855511bf21cd04ae5e8c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Romain=20Bron=C3=A8s?= <>
Date: Fri, 28 Apr 2023 14:42:56 +0200
Subject: [PATCH] Multiples fixes for simulations

* Also change the package constant declaration, add a few. All
  mathematic will be done outside of the package.
 hdl/corr_pi.vhd                   |  62 ++++---
 hdl/data_serializer.vhd           |  39 +++--
 hdl/matrix_mul.vhd                |  44 ++---
 hdl/orbit_error.vhd               |  13 +-
 hdl/pkg_corr_matrixpi.vhd         |  38 +++--
 hdl/pkg_corr_matrixpi_version.vhd |  29 ----
 hdl/top_corr_matrixpi.vhd         |  50 +++---
 rdl/corr_matrixpi.rdl             |   9 +-
 sim/TestCtrl_e.vhd                |  49 ++++++
 sim/tb_corr_matrixpi.vhd          | 257 ++++++++++++++++++++++++++++++
 sim/tc_basic.vhd                  | 142 +++++++++++++++++
 tcl/main.tcl                      |  12 ++
 12 files changed, 608 insertions(+), 136 deletions(-)
 delete mode 100644 hdl/pkg_corr_matrixpi_version.vhd
 create mode 100644 sim/TestCtrl_e.vhd
 create mode 100644 sim/tb_corr_matrixpi.vhd
 create mode 100644 sim/tc_basic.vhd

diff --git a/hdl/corr_pi.vhd b/hdl/corr_pi.vhd
index 8be92e3..02266f0 100644
--- a/hdl/corr_pi.vhd
+++ b/hdl/corr_pi.vhd
@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
 library desy;
-use desy.math_signed;
+use desy.math_signed.all;
 use work.pkg_corr_matrixpi.all;
@@ -13,21 +13,21 @@ entity corr_pi is
         rst_n           : in std_logic;
         -- matmult input
-        matmult         : in signed_array(0 to C_N_MM_PSC-1)(C_W_MM_ACCU-1 downto 0);
+        matmult         : in signed_array(0 to C_N_MM_PSC-1)(C_W_MM-1 downto 0);
         matmult_valid   : in std_logic;
-        matmult_seq     : in std_logic_vector(C_W_SEQ-1 downto 0);
+        matmult_seq     : in std_logic_vector(C_W_BPMSEQ-1 downto 0);
         -- Corr coefs
-        corr_kp         : signed(C_W_COR_KP-1 downto 0);
-        corr_ki         : signed(C_W_COR_KI-1 downto 0);
+        corr_kp         : std_logic_vector(C_W_COR_KP-1 downto 0);
+        corr_ki         : std_logic_vector(C_W_COR_KI-1 downto 0);
         reset_accu      : in std_logic;
         enable_corr     : in std_logic;
         -- Corr output
         corrout_valid   : out std_logic;
-        corrout_seq     : out std_logic_vector(C_W_SEQ-1 downto 0);
-        corrout         : out signed_array(0 to C_N_MM_PSC-1)(C_W_COR_OUT-1 downto 0)
+        corrout_seq     : out std_logic_vector(C_W_BPMSEQ-1 downto 0);
+        corrout         : out signed_array(0 to C_N_MM_PSC-1)(C_W_COR-1 downto 0)
 end entity corr_pi;
@@ -40,7 +40,7 @@ architecture rtl of corr_pi is
     signal r_valid     : std_logic_vector(2 downto 0);
-    signal r_seq      : arr_slv(0 downto 2)(C_W_SEQ-1 downto 0);
+    signal r_seq      : arr_slv(0 to 2)(C_W_BPMSEQ-1 downto 0);
@@ -57,7 +57,7 @@ begin
         elsif rising_edge(clk) then
             r_valid <= r_valid(r_valid'left-1 downto 0) & matmult_valid;
-            r_seq(0)        <= matmul_seq;
+            r_seq(0)        <= matmult_seq;
             for I in 1 to r_seq'right loop
                 r_seq(I) <= r_seq(I-1);
             end loop;
@@ -76,14 +76,14 @@ begin
     G_CORR:for I in 0 to C_N_MM_PSC-1 generate
-        signal r_matmult    : signed(C_W_MM_ACCU-1 downto 0);
-        signal accu_pre     : signed(C_W_MM_ACCU-1 downto 0);
+        signal r_matmult    : signed(C_W_MM-1 downto 0);
+        signal accu_pre     : signed(C_W_MM-1 downto 0);
         signal accu_post    : signed(C_W_COR_ACCU-1 downto 0);
-        signal ki_mult      : signed(C_W_COR_KI+C_W_COR_ACCU-1 downto 0);
-        signal kp_mult      : signed(C_W_COR_KP+C_W_COR_ACCU-1 downto 0);
-        signal ki_mult_rnd  : signed(ki_mult'left-C_N_COR_KIRND downto 0);
-        signal corr_sum     : signed(ki_mult_rnd'left downto 0);
-        signal corr_sum_rnd : signed(C_W_COR_OUT-1 downto 0);
+        signal ki_mult      : signed(C_W_COR_MI-1 downto 0);
+        signal kp_mult      : signed(C_W_COR_MP-1 downto 0);
+        signal ki_mult_rnd  : signed(C_W_COR_MI-C_N_COR_MIRND-1 downto 0);
+        signal corr_sum     : signed(C_W_COR_SUM-1 downto 0);
+        signal corr_sum_rnd : signed(C_W_COR_SUM-C_N_COR_RND-1 downto 0);
@@ -104,11 +104,11 @@ begin
                 if matmult_valid = '1' then
                     accu_pre  <= matmult(I);
                 end if;
-                if rst_accu = '1' then
+                if reset_accu = '1' then
                     accu_post   <= (others => '0');
-                    if ena_accu = '1' then
-                        accu_post <= f_sum_sat(f_sum_sat(accu_post, matmult), accu_pre);
+                    if enable_corr = '1' then
+                        accu_post <= f_sum_sat(f_sum_sat(accu_post, matmult(I)), accu_pre);
                     end if;
                 end if;
@@ -118,13 +118,13 @@ begin
                 -- COEFF MULTIPLIER --
-                ki_mult <= accu_post * corr_ki;
-                kp_mult <= r_matmult * corr_kp;
+                ki_mult <= accu_post * signed('0'&corr_ki);
+                kp_mult <= r_matmult * signed('0'&corr_kp);
-                --------------------------
-                -- FINAL ADDER REGISTER --
-                --------------------------
-                corrout(I) <= f_resize_sat(ki_mult_rnd, C_W_COR_OUT);
+                ------------------------
+                -- FINAL MAP REGISTER --
+                ------------------------
+                corrout(I) <= f_resize_sat(corr_sum_rnd, C_W_COR);
             end if;
@@ -133,8 +133,9 @@ begin
         -- KI MULT ROUNDING --
-        ki_mult_rnd <= ki_mult(ki_mult'left downto C_N_COR_KIRND) when ki_mult(C_N_COR_KIRND-1) = '1' else
-                       f_sum_sat(ki_mult(ki_mult'left downto C_N_COR_KIRND), to_signed(1,1));
+        ki_mult_rnd <= f_resize_lsb(ki_mult, C_W_COR_MI-C_N_COR_MIRND);
+        --ki_mult_rnd <= ki_mult(C_W_COR_MI-1 downto C_N_COR_MIRND) when ki_mult(C_N_COR_MIRND-1) = '1' else
+        --               f_sum_sat(ki_mult(C_W_COR_MI-1 downto C_N_COR_KIRND), to_signed(1,1));
@@ -145,15 +146,12 @@ begin
         -- FINAL ROUNDING --
-        corr_sum_rnd    <= corr_sum(corr_sum'left downto C_N_COR_RND) when corr_sum(C_N_COR_RND-1) = '1' else
-                           f_sum_sat(corr_sum(corr_sum'left downto C_N_COR_RND), to_signed(1,1));
+        corr_sum_rnd    <= corr_sum(C_W_COR_SUM-1 downto C_N_COR_RND) when corr_sum(C_N_COR_RND-1) = '1' else
+                           f_sum_sat(corr_sum(C_W_COR_SUM-1 downto C_N_COR_RND), to_signed(1,1));
     end generate;
 end architecture;
diff --git a/hdl/data_serializer.vhd b/hdl/data_serializer.vhd
index cfd6251..91b80f1 100644
--- a/hdl/data_serializer.vhd
+++ b/hdl/data_serializer.vhd
@@ -2,6 +2,13 @@ library ieee;
 use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
+library desy;
+use desy.ram_tdp;
+library desyrdl;
+use desyrdl.pkg_corr_matrixpi.t_mem_PSCIDTABLE_out;
+use desyrdl.pkg_corr_matrixpi.t_mem_PSCIDTABLE_in;
 use work.pkg_corr_matrixpi.all;
 entity data_serializer is
@@ -18,12 +25,12 @@ entity data_serializer is
         -- Corr parallel input
         corrout_valid   : in std_logic;
-        corrout_seq     : in std_logic_vector(C_W_SEQ-1 downto 0);
-        corrout         : in signed_array(0 to C_N_MM_PSC-1)(C_W_COR_OUT-1 downto 0);
+        corrout_seq     : in std_logic_vector(C_W_BPMSEQ-1 downto 0);
+        corrout         : in signed_array(0 to C_N_MM_PSC-1)(C_W_COR-1 downto 0);
         -- AXIS serial output
-        m_axis_tdata    : out std_logic_vector(C_W_COR_OUT-1 downto 0);
-        m_axis_tuser    : out std_logic_vector(C_W_SEQ-1 downto 0);
+        m_axis_tdata    : out std_logic_vector(C_W_COR+C_W_PSCID-1 downto 0);
+        m_axis_tuser    : out std_logic_vector(C_W_BPMSEQ-1 downto 0);
         m_axis_tvalid   : out std_logic;
         m_axis_tready   : in std_logic
@@ -35,11 +42,13 @@ architecture rtl of data_serializer is
-    signal cnt              : unsigned(pscif_table_i.addr'length-1 downto 0);
+    signal cnt              : unsigned(pscid_table_i.addr'length-1 downto 0);
     signal run_serial       : std_logic;
-    signal r_corr           : signed_array(0 to C_N_MM_PSC-1)(C_W_COR_OUT-1 downto 0);
-    signal r_seq            : std_logic_vector(C_W_SEQ-1 downto 0);
+    signal pscid            : std_logic_vector(C_W_PSCID-1 downto 0);
+    signal r_corr           : signed_array(0 to C_N_MM_PSC-1)(C_W_COR-1 downto 0);
+    signal r_seq            : std_logic_vector(C_W_BPMSEQ-1 downto 0);
@@ -109,16 +118,16 @@ begin
     -- Port A is read write from AXI controller, Port B is read only from logic
     inst_refx_table: entity desy.ram_tdp
     generic map(
-        G_ADDR      => pscif_table_i.addr'length,
-        G_DATA      => C_W_PSDID
+        G_ADDR      => pscid_table_i.addr'length,
+        G_DATA      => C_W_PSCID
     port map(
         pi_clk_a    => clk,
-        pi_en_a     => pscif_table_i.en,
-        pi_we_a     => pscif_table_i.we,
-        pi_addr_a   => pscif_table_i.addr,
-        pi_data_a   =>,
-        po_data_a   =>,
+        pi_en_a     => pscid_table_i.en,
+        pi_we_a     => pscid_table_i.we,
+        pi_addr_a   => pscid_table_i.addr,
+        pi_data_a   =>,
+        po_data_a   =>,
         pi_clk_b    => clk,
         pi_en_b     => '1',
         pi_we_b     => '0',
@@ -130,7 +139,7 @@ begin
-    m_axis_tdata    <= pscid & r_corr(0);
+    m_axis_tdata    <= pscid & std_logic_vector(r_corr(0));
     m_axis_tuser    <= r_seq;
     m_axis_tvalid   <= run_serial;
diff --git a/hdl/matrix_mul.vhd b/hdl/matrix_mul.vhd
index a86bfd8..0f8a665 100644
--- a/hdl/matrix_mul.vhd
+++ b/hdl/matrix_mul.vhd
@@ -4,11 +4,11 @@ use ieee.numeric_std.all;
 library desy;
 use desy.ram_tdp;
-use desy.math_signed;
+use desy.math_signed.all;
 library desyrdl;
-use desyrdl.pkg_corr_matrixpi.t_mem_MATRIXCOEF_out;
-use desyrdl.pkg_corr_matrixpi.t_mem_MATRIXCOEF_in;
+use desyrdl.pkg_corr_matrixpi.t_mem_MATRIXCOEF_2d_out;
+use desyrdl.pkg_corr_matrixpi.t_mem_MATRIXCOEF_2d_in;
 use work.pkg_corr_matrixpi.all;
@@ -18,19 +18,19 @@ entity matrix_mul is
         rst_n                  : in std_logic;
         -- Coef table, desyrdl
-        mm_coef_i              : in t_mem_MATRIXCOEF_out;
-        mm_coef_o              : out t_mem_MATRIXCOEF_in;
+        mm_coef_i              : in t_mem_MATRIXCOEF_2d_out;
+        mm_coef_o              : out t_mem_MATRIXCOEF_2d_in;
         id_cnt_load            : in std_logic_vector(C_W_MM_IDCNT-1 downto 0);
         -- Position data in
-        pos_x                  : in signed(C_W_BPMPOS-1 downto 0);
-        pos_y                  : in signed(C_W_BPMPOS-1 downto 0);
+        pos_x                  : in signed(C_W_OE-1 downto 0);
+        pos_y                  : in signed(C_W_OE-1 downto 0);
         pos_id                 : in std_logic_vector(C_W_BPMID-1 downto 0);
         pos_seq                : in std_logic_vector(C_W_BPMSEQ-1 downto 0);
         pos_tvalid             : in std_logic;
         -- Data out
-        matmult                : out signed_array(0 to C_N_MM_PSC-1)(C_W_MM_ACCU-1 downto 0);
+        matmult                : out signed_array(0 to C_N_MM_PSC-1)(C_W_MM-1 downto 0);
         matmult_tvalid         : out std_logic;
         matmult_seq            : out std_logic_vector(C_W_BPMSEQ-1 downto 0)
@@ -44,10 +44,10 @@ architecture rtl of matrix_mul is
     -- delay registers
-    signal r_pos_x    : signed(C_W_BPMPOS-1 downto 0);
-    signal r_pos_y    : signed(C_W_BPMPOS-1 downto 0);
-    signal r_seq      : arr_slv(0 downto 2)(C_W_BPMSEQ-1 downto 0);
-    signal r_tvalid   : std_logic_vector(2 downto 0);
+    signal r_pos_x    : signed(pos_x'left downto 0);
+    signal r_pos_y    : signed(pos_y'left downto 0);
+    signal r_seq      : arr_slv(0 to 3)(C_W_BPMSEQ-1 downto 0);
+    signal r_tvalid   : std_logic_vector(3 downto 0);
     -- Accumulators general control
@@ -151,7 +151,7 @@ begin
             po_data_b   => table_coefx
-        inst_coefx_table: entity desy.ram_tdp
+        inst_coefy_table: entity desy.ram_tdp
         generic map(
             G_ADDR  => C_W_MM_IDCNT,
             G_DATA  => C_W_MM_COEF
@@ -196,16 +196,24 @@ begin
         end process;
-        -- MAP RESULT
-        matmult(2*I)    <= accu_x;
-        matmult(2*I+1)  <= accu_y;
+        p_rndsat:process(clk, rst_n)
+        begin
+            if rst_n = '0' then
+                matmult(2*I)    <= (others => '0');
+                matmult(2*I+1)  <= (others => '0');
+            elsif rising_edge(clk) then
+                matmult(2*I)    <= f_resize_sat(f_resize_lsb(accu_x, C_W_MM_ACCU-C_N_MM_RND), C_W_MM);
+                matmult(2*I+1)  <= f_resize_sat(f_resize_lsb(accu_y, C_W_MM_ACCU-C_N_MM_RND), C_W_MM);
+            end if;
+        end process;
     end generate;
-    matmult_tvalid   <= r_tvalid(2);
-    matmult_seq     <= r_seq(2);
+    matmult_tvalid   <= r_tvalid(3);
+    matmult_seq     <= r_seq(3);
 end architecture;
diff --git a/hdl/orbit_error.vhd b/hdl/orbit_error.vhd
index db4383e..6d82b6d 100644
--- a/hdl/orbit_error.vhd
+++ b/hdl/orbit_error.vhd
@@ -3,7 +3,10 @@ use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
 library desyrdl;
-use desyrdl.pkg_corr_matrixpi.all;
+use desyrdl.pkg_corr_matrixpi.t_mem_REFORBITX_out;
+use desyrdl.pkg_corr_matrixpi.t_mem_REFORBITX_in;
+use desyrdl.pkg_corr_matrixpi.t_mem_REFORBITY_out;
+use desyrdl.pkg_corr_matrixpi.t_mem_REFORBITY_in;
 library desy;
 use desy.ram_tdp;
@@ -30,8 +33,8 @@ entity orbit_error is
         bpm_tvalid             : in std_logic;
         -- Orbit error output
-        errbpm_x               : out signed(C_W_BPMPOS-1 downto 0);
-        errbpm_y               : out signed(C_W_BPMPOS-1 downto 0);
+        errbpm_x               : out signed(C_W_OE-1 downto 0);
+        errbpm_y               : out signed(C_W_OE-1 downto 0);
         errbpm_id              : out std_logic_vector(C_W_BPMID-1 downto 0);
         errbpm_seq             : out std_logic_vector(C_W_BPMSEQ-1 downto 0);
         errbpm_tvalid          : out std_logic
@@ -113,8 +116,8 @@ begin
             errbpm_x    <= (others => '0');
             errbpm_y    <= (others => '0');
         elsif rising_edge(clk) then
-            errbpm_x    <= f_diff_sat(r_bpm_x, signed(table_refx));
-            errbpm_y    <= f_diff_sat(r_bpm_y, signed(table_refy));
+            errbpm_x    <= f_resize_sat(f_diff_sat(r_bpm_x, signed(table_refx)), C_W_OE);
+            errbpm_y    <= f_resize_sat(f_diff_sat(r_bpm_y, signed(table_refy)), C_W_OE);
         end if;
     end process;
diff --git a/hdl/pkg_corr_matrixpi.vhd b/hdl/pkg_corr_matrixpi.vhd
index 387bbba..44388e6 100644
--- a/hdl/pkg_corr_matrixpi.vhd
+++ b/hdl/pkg_corr_matrixpi.vhd
@@ -12,27 +12,45 @@ package pkg_corr_matrixpi is
-    -- Data input width
+    -- Data input
     constant C_W_BPMPOS         : natural := 32;
     constant C_W_BPMID          : natural := 8;
     constant C_W_BPMSEQ         : natural := 8;
-    constant C_W_PSCID          : natural := 16;
+    constant C_W_PSCID          : natural := 8;
+    -- Orbit error
+    constant C_N_OE_SAT         : natural := 8;
+    constant C_W_OE             : natural := 24; -- C_W_BPMPOS-C_N_OE_SAT;
     -- Matrix multiplier
     constant C_N_MM_BPM         : natural := 122; -- number of bpm (matrix columns)
     constant C_N_MM_PSC         : natural := 100; -- number of psc (matrix lines)
-    constant C_W_MM_COEF        : natural := 32;
-    constant C_W_MM_MULT        : natural := C_W_BPMPOS+C_W_MM_COEF;
-    constant C_W_MM_IDCNT       : natural := natural(ceil(log2(real(C_N_MM_BPM))));
-    constant C_W_MM_ACCU        : natural := C_W_MM_MULT+C_W_MM_IDCNT;
+    constant C_W_MM_COEF        : natural := 18;
+    constant C_W_MM_MULT        : natural := 42; -- C_W_OE+C_W_MM_COEF;
+    constant C_W_MM_IDCNT       : natural := 7;  -- natural(ceil(log2(real(C_N_MM_BPM))));
+    constant C_W_MM_ACCU        : natural := 49; -- C_W_MM_MULT+C_W_MM_IDCNT;
+    constant C_N_MM_SAT         : natural := 0;
+    constant C_N_MM_RND         : natural := 17;
+    constant C_W_MM             : natural := 32; --C_W_MM_ACCU-C_N_MM_SAT-C_N_MM_RND
     -- PI corrector
-    constant C_W_COR_KP         : natural := 32;
-    constant C_W_COR_KI         : natural := 32;
-    constant C_W_COR_ACCU       : natural := C_W_MM_ACCU;
+    constant C_W_COR_KP         : natural := 18;
+    constant C_W_COR_KI         : natural := 18;
+    constant C_W_COR_ACCU       : natural := 40; --C_W_MM+...;
+    constant C_W_COR_MP         : natural := 40; --C_W_MM+C_W_COR_KP
+    constant C_W_COR_MI         : natural := 58; --C_W_COR_ACCU+C_W_COR_KI
+    constant C_N_COR_MIRND      : natural := 18;
+    constant C_W_COR_SUM        : natural := 40; -- max(C_W_COR_MP,C_W_COR_MI-C_N_COR_MIRND)
-    constant C_W_COR_OUT        : natural := 16;
+    constant C_N_COR_SAT        : natural := 0;
+    constant C_N_COR_RND        : natural := 24;
+    constant C_W_COR            : natural := 16; -- C_W_COR_SUM-C_N_COR_SAT-C_N_COR_RND
diff --git a/hdl/pkg_corr_matrixpi_version.vhd b/hdl/pkg_corr_matrixpi_version.vhd
deleted file mode 100644
index 76384c4..0000000
--- a/hdl/pkg_corr_matrixpi_version.vhd
+++ /dev/null
@@ -1,29 +0,0 @@
---          ____  _____________  __                                          --
---         / __ \/ ____/ ___/\ \/ /                 _   _   _                --
---        / / / / __/  \__ \  \  /                 / \ / \ / \               --
---       / /_/ / /___ ___/ /  / /               = ( M | S | K )=             --
---      /_____/_____//____/  /_/                   \_/ \_/ \_/               --
---                                                                           --
--- Copyright (c) 2020 DESY
---! @brief   template for the version package for a particular module
---! @created 2020-01-30
---! Description:
---! This template is used by fwk to inject Version and Timestamp information
---! in to the module's register map
-library ieee;
-use ieee.std_logic_1164.all;
-package pkg_corr_matrixpi_version is
-  constant C_VERSION     : std_logic_vector(31 downto 0) := x"00000000";
-  constant C_TIMESTAMP   : std_logic_vector(31 downto 0) := x"64495b71";
-end pkg_corr_matrixpi_version ;
diff --git a/hdl/top_corr_matrixpi.vhd b/hdl/top_corr_matrixpi.vhd
index f7035c2..029cdf5 100644
--- a/hdl/top_corr_matrixpi.vhd
+++ b/hdl/top_corr_matrixpi.vhd
@@ -10,23 +10,23 @@ use work.pkg_corr_matrixpi.all;
 entity top_corr_matrixpi is
-        clk     : in std_logic;
-        rst_n   : in std_logic;
+        clk            : in std_logic;
+        rst_n          : in std_logic;
         -- AXI-MM interface
-        s_axi_m2s       : in t_corr_matrixpi_m2s;
-        s_axi_s2m       : out t_corr_matrixpi_s2m;
+        s_axi_m2s      : in t_corr_matrixpi_m2s;
+        s_axi_s2m      : out t_corr_matrixpi_s2m;
         -- AXIS input
-        s_axis_tdata    : in std_logic_vector(2*C_W_BPMPOS+C_W_BPMID-1 downto 0);
-        s_axis_tuser    : in std_logic_vector(C_W_BPMSEQ-1 downto 0);
-        s_axis_tvalid   : in std_logic;
+        s_axis_tdata   : in std_logic_vector(2*C_W_BPMPOS+C_W_BPMID-1 downto 0);
+        s_axis_tuser   : in std_logic_vector(C_W_BPMSEQ-1 downto 0);
+        s_axis_tvalid  : in std_logic;
         -- AXIS output
-        m_axis_tdata    : out std_logic_vector(C_W_COR_OUT+C_W_PSCID-1 downto 0);
-        m_axis_tuser    : out std_logic_vector(C_W_BPMSEQ-1 downto 0);
-        m_axis_tvalid   : out std_logic;
-        m_axis_tready   : in std_logic
+        m_axis_tdata   : out std_logic_vector(C_W_COR+C_W_PSCID-1 downto 0);
+        m_axis_tuser   : out std_logic_vector(C_W_BPMSEQ-1 downto 0);
+        m_axis_tvalid  : out std_logic;
+        m_axis_tready  : in std_logic
 end entity;
@@ -38,8 +38,8 @@ architecture struct of top_corr_matrixpi is
     signal rst          : std_logic;
-    signal mm_a2l       : t_addrmap_corr_matrixpi_in;
-    signal mm_l2a       : t_addrmap_corr_matrixpi_out;
+    signal mm_l2a       : t_addrmap_corr_matrixpi_in;
+    signal mm_a2l       : t_addrmap_corr_matrixpi_out;
     -- unpacked input
     signal bpm_x        : signed(C_W_BPMPOS-1 downto 0);
@@ -48,21 +48,21 @@ architecture struct of top_corr_matrixpi is
     signal bpm_seq      : std_logic_vector(C_W_BPMSEQ-1 downto 0);
     -- Position error
-    signal errbpm_x               : signed(C_W_BPMPOS-1 downto 0);
-    signal errbpm_y               : signed(C_W_BPMPOS-1 downto 0);
+    signal errbpm_x               : signed(C_W_OE-1 downto 0);
+    signal errbpm_y               : signed(C_W_OE-1 downto 0);
     signal errbpm_id              : std_logic_vector(C_W_BPMID-1 downto 0);
     signal errbpm_seq             : std_logic_vector(C_W_BPMSEQ-1 downto 0);
     signal errbpm_tvalid          : std_logic;
     -- Matrix mutliplication result
-    signal matmult                : signed_array(0 to C_N_MM_PSC-1)(C_W_MM_ACCU-1 downto 0);
+    signal matmult                : signed_array(0 to C_N_MM_PSC-1)(C_W_MM-1 downto 0);
     signal matmult_tvalid         : std_logic;
     signal matmult_seq            : std_logic_vector(C_W_BPMSEQ-1 downto 0);
     -- Corrector result, parallel
     signal corrout_valid   : std_logic;
     signal corrout_seq     : std_logic_vector(C_W_BPMSEQ-1 downto 0);
-    signal corrout         : signed_array(0 to C_N_MM_PSC-1)(C_W_COR_OUT-1 downto 0);
+    signal corrout         : signed_array(0 to C_N_MM_PSC-1)(C_W_COR-1 downto 0);
     -- Serializer
     signal overrun_flag     : std_logic;
@@ -83,8 +83,8 @@ begin
         pi_s_top   => s_axi_m2s,
         po_s_top   => s_axi_s2m,
         -- to logic interface
-        pi_addrmap => mm_a2l,
-        po_addrmap => mm_l2a
+        pi_addrmap => mm_l2a,
+        po_addrmap => mm_a2l
@@ -136,7 +136,7 @@ begin
         -- Coef table, desyrdl
         mm_coef_i              => mm_a2l.MATRIXCOEF,
         mm_coef_o              => mm_l2a.MATRIXCOEF,
-        id_cnt_load            =>,
+        id_cnt_load            =>,
         -- Position data in
         pos_x                  => errbpm_x,
@@ -161,15 +161,15 @@ begin
         -- matmult input
         matmult         => matmult,
-        matmult_valid   => matmult_valid,
+        matmult_valid   => matmult_tvalid,
         matmult_seq     => matmult_seq,
         -- Corr coefs
-        corr_kp         =>,
-        corr_ki         =>,
+        corr_kp         =>,
+        corr_ki         =>,
-        reset_accu      => mm.a2l.CONTROL.RST_ACCU,
-        enable_corr     => mm.a2l.CONTROL.ENABLE,
+        reset_accu      =>,
+        enable_corr     =>,
         -- Corr output
         corrout_valid   => corrout_valid,
diff --git a/rdl/corr_matrixpi.rdl b/rdl/corr_matrixpi.rdl
index 0a17328..351efe4 100644
--- a/rdl/corr_matrixpi.rdl
+++ b/rdl/corr_matrixpi.rdl
@@ -43,15 +43,20 @@ addrmap corr_matrixpi {
         field {sw = rw; hw = r;} data[`C_W_COR_KI];
     } CORR_KI;
+    reg {
+        desc="Number of ID to count for matrix multiplication.";
+        field {sw = rw; hw = r;} data[`C_W_MM_IDCNT] = `C_N_MM_BPM-1;
+    } MM_ID_CNT;
     mem {
         desc = "X Reference orbit.";
-        memwidth = `C_W_BPMID;
+        memwidth = `C_W_BPMPOS;
         mementries = `C_N_MM_BPM;
     } external REFORBITX;
     mem {
         desc = "Y Reference orbit.";
-        memwidth = `C_W_BPMID;
+        memwidth = `C_W_BPMPOS;
         mementries = `C_N_MM_BPM;
     } external REFORBITY;
diff --git a/sim/TestCtrl_e.vhd b/sim/TestCtrl_e.vhd
new file mode 100644
index 0000000..9916b21
--- /dev/null
+++ b/sim/TestCtrl_e.vhd
@@ -0,0 +1,49 @@
+library ieee ;
+    use ieee.std_logic_1164.all ;
+    use ieee.numeric_std.all ;
+library OSVVM ;
+    context OSVVM.OsvvmContext ;
+    use osvvm.ScoreboardPkg_slv.all ;
+-- We need AXIS and AXI4L
+library osvvm_AXI4 ;
+    context osvvm_AXI4.AxiStreamContext ;
+    context osvvm_Axi4.Axi4LiteContext ;
+-- DESYRDL interface package
+library desyrdl;
+    use desyrdl.pkg_corr_matrixpi.all;
+use work.pkg_corr_matrixpi.all;
+entity TestCtrl is
+    port (
+        -- Global Signal Interface
+        Clk                 : In    std_logic ;
+        nReset              : In    std_logic ;
+        -- Transaction Interfaces
+        StreamTxRec         : inout StreamRecType;
+        StreamRxRec         : inout StreamRecType;
+        ManagerRec          : inout AddressBusRecType
+    ) ;
+    -- Derive AXI interface properties from the StreamTxRec
+    constant DATA_TX_WIDTH : integer := StreamTxRec.DataToModel'length ;
+    --constant DATA_TX_BYTES : integer := DATA_WIDTH/8 ;
+    -- Simplifying access to Burst FIFOs using aliases
+    --alias TxBurstFifo : ScoreboardIdType is StreamTxRec.BurstFifo ;
+    --alias RxBurstFifo : ScoreboardIdType is StreamRxRec.BurstFifo ;
+    constant AXI_ADDR_WIDTH : integer := ManagerRec.Address'length ;
+    constant AXI_DATA_WIDTH : integer := ManagerRec.DataToModel'length ;
+    constant OSVVM_RESULTS_DIR   : string := "" ;
+    constant OSVVM_PATH_TO_TESTS : string := "" ;
+end entity TestCtrl ;
diff --git a/sim/tb_corr_matrixpi.vhd b/sim/tb_corr_matrixpi.vhd
new file mode 100644
index 0000000..83dbbe0
--- /dev/null
+++ b/sim/tb_corr_matrixpi.vhd
@@ -0,0 +1,257 @@
+library ieee ;
+    use ieee.std_logic_1164.all ;
+    use ieee.numeric_std.all ;
+library osvvm ;
+    context osvvm.OsvvmContext ;
+library osvvm_AXI4 ;
+    context osvvm_AXI4.AxiStreamContext ;
+    context osvvm_AXI4.Axi4LiteContext ;
+library desyrdl;
+    use desyrdl.common.all;
+    use desyrdl.pkg_corr_matrixpi.all;
+use work.pkg_corr_matrixpi.all;
+entity tb_corr_matrixpi is
+end entity tb_corr_matrixpi;
+architecture TestHarness of tb_corr_matrixpi is
+    constant TPERIOD_CLK : time := 4 ns ;
+    constant TPD         : time := 1 ns ;
+    ------------------------
+    ------------------------
+    signal tb_clk       : std_logic;
+    signal tb_rst_n     : std_logic;
+    -- AXI-MM desyrdl
+    signal tb_s_axi_m2s       : t_corr_matrixpi_m2s;
+    signal tb_s_axi_s2m       : t_corr_matrixpi_s2m;
+    -- Address Bus Transaction Interface
+    signal ManagerRec: AddressBusRecType(
+      Address(C_ADDR_WIDTH-1 downto 0),
+      DataToModel(C_DATA_WIDTH-1 downto 0),
+      DataFromModel(C_DATA_WIDTH-1 downto 0)
+    ) ;
+    -- AXI Manager Functional Interface
+    signal   AxiBus : Axi4LiteRecType(
+        WriteAddress( Addr (C_ADDR_WIDTH-1 downto 0) ),
+        WriteData   ( Data (C_DATA_WIDTH-1 downto 0),   Strb(C_DATA_WIDTH/8-1 downto 0) ),
+        ReadAddress ( Addr (C_ADDR_WIDTH-1 downto 0) ),
+        ReadData    ( Data (C_DATA_WIDTH-1 downto 0) )
+    ) ;
+    -- AXI Stream transmitter Functionnal Interface
+    signal tb_axis_tx_tvalid    : std_logic ;
+    signal tb_axis_tx_tready    : std_logic ;
+    signal tb_axis_tx_tdata     : std_logic_vector(2*C_W_BPMPOS+C_W_BPMID-1 downto 0) ;
+    signal tb_axis_tx_tid       : std_logic_vector(0 downto 0) ;
+    signal tb_axis_tx_tdest     : std_logic_vector(0 downto 0) ;
+    signal tb_axis_tx_tuser     : std_logic_vector(C_W_BPMSEQ-1 downto 0) ;
+    signal tb_axis_tx_tstrb     : std_logic_vector(tb_axis_tx_tdata'length/8-1 downto 0) ;
+    signal tb_axis_tx_tkeep     : std_logic_vector(tb_axis_tx_tdata'length/8-1 downto 0) ;
+    signal tb_axis_tx_tlast     : std_logic ;
+    -- AXI Stream transmitter Functionnal Interface
+    signal tb_axis_rx_tvalid    : std_logic ;
+    signal tb_axis_rx_tready    : std_logic ;
+    signal tb_axis_rx_tdata     : std_logic_vector(C_W_COR+C_W_PSCID-1 downto 0) ;
+    signal tb_axis_rx_tid       : std_logic_vector(0 downto 0) ;
+    signal tb_axis_rx_tdest     : std_logic_vector(0 downto 0) ;
+    signal tb_axis_rx_tuser     : std_logic_vector(C_W_BPMSEQ-1 downto 0) ;
+    signal tb_axis_rx_tstrb     : std_logic_vector(tb_axis_tx_tdata'length/8-1 downto 0) ;
+    signal tb_axis_rx_tkeep     : std_logic_vector(tb_axis_tx_tdata'length/8-1 downto 0) ;
+    signal tb_axis_rx_tlast     : std_logic ;
+    -- Stream Transaction Interface
+    constant C_W_AXIS_RX_PARAM : natural := tb_axis_rx_tid'length + tb_axis_tx_tdest'length + tb_axis_tx_tuser'length +1;
+    constant C_W_AXIS_TX_PARAM : natural := tb_axis_tx_tid'length + tb_axis_tx_tdest'length + tb_axis_tx_tuser'length +1;
+    signal StreamTxRec, StreamRxRec : StreamRecType(
+        DataToModel   (tb_axis_tx_tdata'left downto 0),
+        DataFromModel (tb_axis_rx_tdata'left downto 0),
+        ParamToModel  (C_W_AXIS_TX_PARAM-1 downto 0),
+        ParamFromModel(C_W_AXIS_RX_PARAM-1 downto 0)
+    ) ;
+    ---------------------------
+    ---------------------------
+    component TestCtrl is
+    port (
+        -- Global Signal Interface
+        Clk             : In    std_logic ;
+        nReset          : In    std_logic ;
+        -- Transaction Interfaces
+        StreamTxRec   : inout StreamRecType;
+        StreamRxRec   : inout StreamRecType;
+        ManagerRec    : inout AddressBusRecType
+    );
+    end component TestCtrl ;
+    ---------------------
+    -- CLOCK AND RESET --
+    ---------------------
+    Osvvm.TbUtilPkg.CreateClock (
+        Clk        => tb_Clk,
+        Period     => Tperiod_Clk
+    );
+    Osvvm.TbUtilPkg.CreateReset (
+        Reset       => tb_rst_n,
+        ResetActive => '0',
+        Clk         => tb_clk,
+        Period      => 7 * tperiod_Clk,
+        tpd         => tpd
+    );
+    -----------------------
+    -----------------------
+    dut: entity work.top_corr_matrixpi
+    port map(
+        clk            => tb_clk,
+        rst_n          => tb_rst_n,
+        -- AXI-MM interface
+        s_axi_m2s      => tb_s_axi_m2s,
+        s_axi_s2m      => tb_s_axi_s2m,
+        -- AXIS input
+        s_axis_tdata   => tb_axis_tx_tdata,
+        s_axis_tuser   => tb_axis_tx_tuser,
+        s_axis_tvalid  => tb_axis_tx_tvalid,
+        -- AXIS output
+        m_axis_tdata   => tb_axis_rx_tdata,
+        m_axis_tuser   => tb_axis_rx_tuser,
+        m_axis_tvalid  => tb_axis_rx_tvalid,
+        m_axis_tready  => tb_axis_rx_tready
+    );
+    ----------------------
+    ----------------------
+    tb_s_axi_m2s.awaddr(tb_s_axi_m2s.awaddr'left downto C_ADDR_WIDTH)  <= (others => '0');
+    tb_s_axi_m2s.awaddr(C_ADDR_WIDTH-1 downto 0)  <= axibus.writeaddress.addr;
+    tb_s_axi_m2s.awprot          <= axibus.writeaddress.prot;
+    tb_s_axi_m2s.awvalid         <= axibus.writeaddress.valid;
+    tb_s_axi_m2s.wdata           <=;
+    tb_s_axi_m2s.wstrb           <= axibus.writedata.strb;
+    tb_s_axi_m2s.wvalid          <= axibus.writedata.valid;
+    tb_s_axi_m2s.bready          <= axibus.writeresponse.ready;
+    tb_s_axi_m2s.araddr(tb_s_axi_m2s.araddr'left downto C_ADDR_WIDTH)  <= (others => '0');
+    tb_s_axi_m2s.araddr(C_ADDR_WIDTH-1 downto 0)          <= axibus.readaddress.addr;
+    tb_s_axi_m2s.arprot          <= axibus.readaddress.prot;
+    tb_s_axi_m2s.arvalid         <= axibus.readaddress.valid;
+    tb_s_axi_m2s.rready          <= axibus.readdata.ready;
+    axibus.writeaddress.ready    <= tb_s_axi_s2m.awready;
+    axibus.writedata.ready       <= tb_s_axi_s2m.wready;
+    axibus.writeresponse.resp    <= tb_s_axi_s2m.bresp;
+    axibus.writeresponse.valid   <= tb_s_axi_s2m.bvalid;
+    axibus.readaddress.ready     <= tb_s_axi_s2m.arready;
+         <= tb_s_axi_s2m.rdata;
+    axibus.readdata.resp         <= tb_s_axi_s2m.rresp;
+    axibus.readdata.valid        <= tb_s_axi_s2m.rvalid;
+    -----------------------------
+    -----------------------------
+    -- AXI-MM Verification Manager
+    vc_axi_manager : Axi4LiteManager
+    port map (
+        -- Globals
+        Clk         => tb_clk,
+        nReset      => tb_rst_n,
+        -- AXI Manager Functional Interface
+        AxiBus      => AxiBus,
+        -- Testbench Transaction Interface
+        TransRec    => ManagerRec
+    ) ;
+    -- Axi-Stream Verification Manager
+    vc_axis_transmitter : AxiStreamTransmitter
+    generic map (
+        tperiod_Clk    => tperiod_Clk,
+        tpd_Clk_TValid => tpd,
+        tpd_Clk_TID    => tpd,
+        tpd_Clk_TDest  => tpd,
+        tpd_Clk_TUser  => tpd,
+        tpd_Clk_TData  => tpd,
+        tpd_Clk_TStrb  => tpd,
+        tpd_Clk_TKeep  => tpd,
+        tpd_Clk_TLast  => tpd
+    )
+    port map (
+        -- Globals
+        Clk       => tb_clk,
+        nReset    => tb_rst_n,
+        -- AXI Stream Interface
+        TValid    => tb_axis_tx_tvalid,
+        TReady    => tb_axis_tx_tready,
+        TID       => tb_axis_tx_tid,
+        TDest     => tb_axis_tx_tdest,
+        TUser     => tb_axis_tx_tuser,
+        TData     => tb_axis_tx_tdata ,
+        TStrb     => tb_axis_tx_tstrb,
+        TKeep     => tb_axis_tx_tkeep,
+        TLast     => tb_axis_tx_tlast,
+        -- Testbench Transaction Interface
+        TransRec  => StreamTxRec
+    );
+    -- Axi-Stream Verification Manager
+    vc_axis_receiver : AxiStreamReceiver
+    generic map (
+        tperiod_Clk    => tperiod_Clk,
+        tpd_Clk_Tready => tpd
+    )
+    port map (
+        -- Globals
+        Clk       => tb_clk,
+        nReset    => tb_rst_n,
+        -- AXI Stream Interface
+        TValid    => tb_axis_rx_tvalid,
+        TReady    => tb_axis_rx_tready,
+        TID       => tb_axis_rx_tid,
+        TDest     => tb_axis_rx_tdest,
+        TUser     => tb_axis_rx_tuser,
+        TData     => tb_axis_rx_tdata ,
+        TStrb     => tb_axis_rx_tstrb,
+        TKeep     => tb_axis_rx_tkeep,
+        TLast     => tb_axis_rx_tlast,
+        -- Testbench Transaction Interface
+        TransRec  => StreamRxRec
+    );
+    ---------------
+    -- TEST CTRL --
+    ---------------
+    TestCtrl_1 : TestCtrl
+    port map (
+        -- Globals
+        Clk            => tb_clk,
+        nReset         => tb_rst_n,
+        -- Testbench Transaction Interfaces
+        StreamTxRec    => StreamTxRec,
+        StreamRxRec    => StreamRxRec,
+        ManagerRec     => ManagerRec
+    ) ;
+end architecture TestHarness;
diff --git a/sim/tc_basic.vhd b/sim/tc_basic.vhd
new file mode 100644
index 0000000..15ac586
--- /dev/null
+++ b/sim/tc_basic.vhd
@@ -0,0 +1,142 @@
+use work.pkg_corr_matrixpi_version.all;
+architecture basic of TestCtrl is
+    ------------------------
+    ------------------------
+    signal ConfigDone : integer_barrier := 1 ;
+    signal TestDone : integer_barrier := 1 ;
+    --------------------------
+    --------------------------
+    function f_addr(addr:natural) return std_logic_vector is
+    begin
+        return std_logic_vector(to_unsigned(addr, AXI_ADDR_WIDTH));
+    end function;
+    function f_sdata(data:integer) return std_logic_vector is
+    begin
+        return std_logic_vector(to_signed(data, AXI_DATA_WIDTH));
+    end function;
+    function f_bpmpkt(id:natural; x:integer; y:integer) return std_logic_vector is
+    begin
+        return std_logic_vector(to_unsigned(id, C_W_BPMID)) & std_logic_vector(to_signed(x, C_W_BPMPOS)) & std_logic_vector(to_signed(y, C_W_BPMPOS));
+    end function;
+    ------------------------------------------------------------
+    -- ControlProc
+    --   Set up AlertLog and wait for end of test
+    ------------------------------------------------------------
+    ControlProc : process
+    begin
+        -- Initialization of test
+        SetAlertLogName("BasicTest");
+        SetLogEnable(PASSED, TRUE);    -- Enable PASSED logs
+        SetLogEnable(INFO, TRUE);    -- Enable INFO logs
+        -- Wait for testbench initialization
+        wait for 0 ns ;  wait for 0 ns;
+        TranscriptOpen(OSVVM_RESULTS_DIR & "BasicTestTr.txt");
+        SetTranscriptMirror(TRUE);
+        -- Wait for Design Reset
+        wait until nReset = '1';
+        ClearAlerts;
+        -- Wait for test to finish
+        WaitForBarrier(TestDone, 35 ms);
+        AlertIf(now >= 35 ms, "Test finished due to timeout");
+        AlertIf(GetAffirmCount < 1, "Test is not Self-Checking");
+        TranscriptClose;
+        EndOfTestReports;
+        std.env.stop;
+        wait;
+    end process ControlProc;
+    ------------------------------------------------------------
+    -- ManagerProc
+    --   Generate transactions for AxiManager
+    ------------------------------------------------------------
+    ManagerProc : process
+        variable Data : std_logic_vector(AXI_DATA_WIDTH-1 downto 0) ;
+    begin
+        wait until nReset = '1';
+        WaitForClock(ManagerRec, 2);
+        log("Read version", INFO) ;
+        Read(ManagerRec, f_addr(0), Data) ;
+        AffirmIfEqual(Data, C_VERSION, "Manager Read Data: ") ;
+        log("==--- Configure the DUT ---==", INFO);
+        log("+-- Global Config", INFO);
+        -- Correction coefficients
+        Write(ManagerRec, f_addr(12), f_sdata(2387));
+        Write(ManagerRec, f_addr(16), f_sdata(7353));
+        -- Set all orbit reference to 0
+        for I in 0 to C_N_MM_BPM-1 loop
+            Write(ManagerRec, f_addr(20+I*4), f_sdata(0));
+        end loop;
+        WaitForBarrier(ConfigDone);
+        -- Global Enable
+        WaitForClock(ManagerRec, 10) ;
+        --Write(ManagerRec, std_logic_vector(C_REGISTER_INFO(C_CONFIG_ID).address), X"00000001") ;
+        -- Wait for outputs to propagate and signal TestDone
+        WaitForClock(ManagerRec, 2000) ;
+        WaitForBarrier(TestDone) ;
+        wait ;
+    end process ManagerProc ;
+    ------------------------------------------------------------
+    -- AxiTransmitterProc
+    --   Generate transactions for AxiTransmitter
+    ------------------------------------------------------------
+    TransmitterProc : process
+        variable Data : std_logic_vector(DATA_TX_WIDTH-1 downto 0);
+        variable OffSet : integer ;
+        variable TransactionCount : integer;
+        variable ErrorCount : integer;
+        variable CurTime : time ;
+        variable TxAlertLogID : AlertLogIDType ;
+    begin
+        wait until nReset = '1' ;
+        WaitForClock(StreamTxRec, 2) ;
+        WaitForBarrier(ConfigDone) ;
+        log("Sending bpm packets", INFO);
+        for I in 3 to 124 loop
+            Send(StreamTxRec, f_bpmpkt(I, -7874+333*I, 5679-1098*I));
+        end loop;
+        -- Wait for outputs to propagate and signal TestDone
+        WaitForClock(StreamTxRec, 2) ;
+        WaitForBarrier(TestDone) ;
+        wait ;
+    end process TransmitterProc ;
+end basic;
+Configuration tc_basic of tb_corr_matrixpi is
+  for TestHarness
+    for TestCtrl_1 : TestCtrl
+      use entity work.TestCtrl(basic);
+    end for;
+  end for;
+end tc_basic;
diff --git a/tcl/main.tcl b/tcl/main.tcl
index 9be85d7..82b67c9 100644
--- a/tcl/main.tcl
+++ b/tcl/main.tcl
@@ -25,7 +25,14 @@ proc setSources {} {
   lappend Sources {"../hdl/data_serializer.vhd" "VHDL 2008"}
   lappend Sources {"../hdl/top_corr_matrixpi.vhd" "VHDL 2008"}
   lappend Sources [list "${::fwfwk::LibPath}/desy_vhdl/hdl/memory/ram/ram_tdp.vhd" "VHDL 2008" "desy"]
+  lappend Sources [list "${::fwfwk::LibPath}/desy_vhdl/hdl/math/pkg_math_utils.vhd" "VHDL 2008" "desy"]
   lappend Sources [list "${::fwfwk::LibPath}/desy_vhdl/hdl/math/pkg_math_signed.vhd" "VHDL 2008" "desy"]
+  lappend Sources [list "${::fwfwk::LibPath}/desy_vhdl/hdl/common/pkg_common_logic_utils.vhd" "VHDL 2008" "desy"]
+  # Simulation sources
+  lappend Sources {"../sim/tb_corr_matrixpi.vhd"  "VHDL 2008" "" "simulation"}
+  lappend Sources {"../sim/TestCtrl_e.vhd"  "VHDL 2008" "" "simulation"}
+  lappend Sources {"../sim/tc_basic.vhd"  "VHDL 2008" "" "simulation"}
 # ==============================================================================
@@ -46,4 +53,9 @@ proc doOnBuild {} {
 # ==============================================================================
 proc setSim {} {
+    variable SimTop
+    set SimTop { \
+        tc_basic \
+    }