/*
    ChibiOS/RT - Copyright (C) 2006-2007 Giovanni Di Sirio.

    This file is part of ChibiOS/RT.

    ChibiOS/RT is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    ChibiOS/RT is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <ch.h>

#include "test.h"

/**
 * @page test_benchmarks Kernel Benchmarks
 *
 * <h2>Description</h2>
 * This module implements a series of system benchmarks. The benchmarks are
 * useful as a stress test and as a reference when comparing ChibiOS/RT
 * with similar systems.
 *
 * <h2>Objective</h2>
 * Objective of the test module is to provide a performance index for the
 * most critical system subsystems. The performance numbers allow to
 * discover performance regressions between successive ChibiOS/RT releases.
 *
 * <h2>Preconditions</h2>
 * None.
 *
 * <h2>Test Cases</h2>
 * - @subpage test_benchmarks_001
 * - @subpage test_benchmarks_002
 * - @subpage test_benchmarks_003
 * - @subpage test_benchmarks_004
 * - @subpage test_benchmarks_005
 * - @subpage test_benchmarks_006
 * - @subpage test_benchmarks_007
 * - @subpage test_benchmarks_008
 * - @subpage test_benchmarks_009
 * - @subpage test_benchmarks_010
 * - @subpage test_benchmarks_011
 * .
 * @file testbmk.c Kernel Benchmarks
 * @brief Kernel Benchmarks source file
 * @file testbmk.h
 * @brief Kernel Benchmarks header file
 */

static Semaphore sem1;
#if CH_USE_MUTEXES
static Mutex mtx1;
#endif

static msg_t thread1(void *p) {
  msg_t msg;

  do {
    chMsgRelease(msg = chMsgWait());
  } while (msg);
  return 0;
}

__attribute__((noinline))
static unsigned int msg_loop_test(Thread *tp) {

  uint32_t n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    (void)chMsgSend(tp, 1);
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  (void)chMsgSend(tp, 0);
  return n;
}

/**
 * @page test_benchmarks_001 Messages performance #1
 *
 * <h2>Description</h2>
 * A message server thread is created with a lower priority than the client
 * thread, the messages throughput per second is measured and the result
 * printed in the output log.
 */

static char *bmk1_gettest(void) {

  return "Benchmark, messages throughput";
}

static void bmk1_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()-1, thread1, NULL);
  n = msg_loop_test(threads[0]);
  chThdTerminate(threads[0]);
  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n);
  test_print(" msgs/S, ");
  test_printn(n << 1);
  test_println(" ctxswc/S");
}

const struct testcase testbmk1 = {
  bmk1_gettest,
  NULL,
  NULL,
  bmk1_execute
};

/**
 * @page test_benchmarks_002 Messages performance #2
 *
 * <h2>Description</h2>
 * A message server thread is created with an higher priority than the client
 * thread, the messages throughput per second is measured and the result
 * printed in the output log.
 */

static char *bmk2_gettest(void) {

  return "Benchmark, messages, empty RL";
}

static void bmk2_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+1, thread1, NULL);
  n = msg_loop_test(threads[0]);
  chThdTerminate(threads[0]);
  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n);
  test_print(" msgs/S, ");
  test_printn(n << 1);
  test_println(" ctxswc/S");
}

const struct testcase testbmk2 = {
  bmk2_gettest,
  NULL,
  NULL,
  bmk2_execute
};

static msg_t thread2(void *p) {

  return (msg_t)p;
}

/**
 * @page test_benchmarks_003 Messages performance #3
 *
 * <h2>Description</h2>
 * A message server thread is created with an higher priority than the client
 * thread, four lower priority threads crowd the ready list, the messages
 * throughput per second is measured while the ready list and the result
 * printed in the output log.
 */

static char *bmk3_gettest(void) {

  return "Benchmark, messages, 4 in RL";
}

static void bmk3_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+1, thread1, NULL);
  threads[1] = chThdCreateStatic(wa[1], WA_SIZE, chThdGetPriority()-2, thread2, NULL);
  threads[2] = chThdCreateStatic(wa[2], WA_SIZE, chThdGetPriority()-3, thread2, NULL);
  threads[3] = chThdCreateStatic(wa[3], WA_SIZE, chThdGetPriority()-4, thread2, NULL);
  threads[4] = chThdCreateStatic(wa[4], WA_SIZE, chThdGetPriority()-5, thread2, NULL);
  n = msg_loop_test(threads[0]);
  chThdTerminate(threads[0]);
  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n);
  test_print(" msgs/S, ");
  test_printn(n << 1);
  test_println(" ctxswc/S");
}

const struct testcase testbmk3 = {
  bmk3_gettest,
  NULL,
  NULL,
  bmk3_execute
};

/**
 * @page test_benchmarks_004 Context Switch performance
 *
 * <h2>Description</h2>
 * A thread is created that just performs a @p chSchGoSleepS() into a loop,
 * the thread is awakened as fast is possible by the tester thread.<br>
 * The Context Switch performance is calculated by measuring the number of
 * interactions after a second of continuous operations.
 */

static char *bmk4_gettest(void) {

  return "Benchmark, context switch";
}

msg_t thread4(void *p) {
  msg_t msg;
  Thread *self = chThdSelf();

  chSysLock();
  do {
    chSchGoSleepS(PRSUSPENDED);
    msg = self->p_rdymsg;
  } while (msg == RDY_OK);
  chSysUnlock();
  return 0;
}

static void bmk4_execute(void) {
  Thread *tp;

  tp = threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+1, thread4, NULL);
  uint32_t n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chSysLock();
    chSchWakeupS(tp, RDY_OK);
    chSchWakeupS(tp, RDY_OK);
    chSchWakeupS(tp, RDY_OK);
    chSchWakeupS(tp, RDY_OK);
    chSysUnlock();
    n += 4;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  chSysLock();
  chSchWakeupS(tp, RDY_TIMEOUT);
  chSysUnlock();

  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n * 2);
  test_println(" ctxswc/S");
}

const struct testcase testbmk4 = {
  bmk4_gettest,
  NULL,
  NULL,
  bmk4_execute
};

/**
 * @page test_benchmarks_005 Threads performance, full cycle
 *
 * <h2>Description</h2>
 * Threads are continuously created and terminated into a loop. A full
 * @p chThdCreateStatic() / @p chThdExit() / @p chThdWait() cycle is performed
 * in each interaction.<br>
 * The performance is calculated by measuring the number of interactions after
 * a second of continuous operations.
 */

static char *bmk5_gettest(void) {

  return "Benchmark, threads, full cycle";
}

static void bmk5_execute(void) {

  uint32_t n = 0;
  void *wap = wa[0];
  tprio_t prio = chThdGetPriority() - 1;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chThdWait(chThdCreateStatic(wap, WA_SIZE, prio, thread2, NULL));
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n);
  test_println(" threads/S");
}

const struct testcase testbmk5 = {
  bmk5_gettest,
  NULL,
  NULL,
  bmk5_execute
};

/**
 * @page test_benchmarks_006 Threads performance, create/exit only
 *
 * <h2>Description</h2>
 * Threads are continuously created and terminated into a loop. A partial
 * @p chThdCreateStatic() / @p chThdExit() cycle is performed in each
 * interaction, the @p chThdWait() is not necessary because the thread is
 * created at an higher priority so there is no need to wait for it to
 * terminate.<br>
 * The performance is calculated by measuring the number of interactions after
 * a second of continuous operations.
 */

static char *bmk6_gettest(void) {

  return "Benchmark, threads, create only";
}

static void bmk6_execute(void) {

  uint32_t n = 0;
  void *wap = wa[0];
  tprio_t prio = chThdGetPriority() + 1;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chThdCreateStatic(wap, WA_SIZE, prio, thread2, NULL);
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n);
  test_println(" threads/S");
}

const struct testcase testbmk6 = {
  bmk6_gettest,
  NULL,
  NULL,
  bmk6_execute
};

/**
 * @page test_benchmarks_007 Mass reschedulation performance
 *
 * <h2>Description</h2>
 * Five threads are created and atomically reschedulated by resetting the
 * semaphore where they are waiting on. The operation is performed into a
 * continuous loop.<br>
 * The performance is calculated by measuring the number of interactions after
 * a second of continuous operations.
 */

static msg_t thread3(void *p) {

  while (!chThdShouldTerminate())
    chSemWait(&sem1);
  return 0;
}

static char *bmk7_gettest(void) {

  return "Benchmark, mass reschedulation, 5 threads";
}

static void bmk7_setup(void) {

  chSemInit(&sem1, 0);
}

static void bmk7_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+5, thread3, NULL);
  threads[1] = chThdCreateStatic(wa[1], WA_SIZE, chThdGetPriority()+4, thread3, NULL);
  threads[2] = chThdCreateStatic(wa[2], WA_SIZE, chThdGetPriority()+3, thread3, NULL);
  threads[3] = chThdCreateStatic(wa[3], WA_SIZE, chThdGetPriority()+2, thread3, NULL);
  threads[4] = chThdCreateStatic(wa[4], WA_SIZE, chThdGetPriority()+1, thread3, NULL);

  n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chSemReset(&sem1, 0);
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_terminate_threads();
  chSemReset(&sem1, 0);
  test_wait_threads();

  test_print("--- Score : ");
  test_printn(n);
  test_print(" reschedulations/S, ");
  test_printn(n * 6);
  test_println(" ctxswc/S");
}

const struct testcase testbmk7 = {
  bmk7_gettest,
  bmk7_setup,
  NULL,
  bmk7_execute
};

/**
 * @page test_benchmarks_008 I/O Queues throughput
 *
 * <h2>Description</h2>
 * Four bytes are written and then read from an @p InputQueue into a continuous
 * loop.<br>
 * The performance is calculated by measuring the number of interactions after
 * a second of continuous operations.
 */

static char *bmk8_gettest(void) {

  return "Benchmark, I/O Queues throughput";
}

static void bmk8_execute(void) {
  static uint8_t ib[16];
  static InputQueue iq;

  chIQInit(&iq, ib, sizeof(ib), NULL);
  uint32_t n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chIQPutI(&iq, 0);
    chIQPutI(&iq, 1);
    chIQPutI(&iq, 2);
    chIQPutI(&iq, 3);
    (void)chIQGet(&iq);
    (void)chIQGet(&iq);
    (void)chIQGet(&iq);
    (void)chIQGet(&iq);
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 4);
  test_println(" bytes/S");
}

const struct testcase testbmk8 = {
  bmk8_gettest,
  NULL,
  NULL,
  bmk8_execute
};

/**
 * @page test_benchmarks_009 Virtual Timers set/reset performance
 *
 * <h2>Description</h2>
 * A virtual timer is set and immediately reset into a continuous loop.<br>
 * The performance is calculated by measuring the number of interactions after
 * a second of continuous operations.
 */

static char *bmk9_gettest(void) {

  return "Benchmark, virtual timers set/reset";
}

static void tmo(void *param) {}

static void bmk9_execute(void) {
  static VirtualTimer vt1, vt2;
  uint32_t n = 0;

  test_wait_tick();
  test_start_timer(1000);
  do {
    chSysLock();
    chVTSetI(&vt1, 1, tmo, NULL);
    chVTSetI(&vt2, 10000, tmo, NULL);
    chVTResetI(&vt1);
    chVTResetI(&vt2);
    chSysUnlock();
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 2);
  test_println(" timers/S");
}

const struct testcase testbmk9 = {
  bmk9_gettest,
  NULL,
  NULL,
  bmk9_execute
};

/**
 * @page test_benchmarks_010 Semaphores wait/signal performance
 *
 * <h2>Description</h2>
 * A counting semaphore is taken/released into a continuous loop, no Context
 * Switch happens because the counter is always non negative.<br>
 * The performance is calculated by measuring the number of interactions after
 * a second of continuous operations.
 */

static char *bmk10_gettest(void) {

  return "Benchmark, semaphores wait/signal";
}

static void bmk10_setup(void) {

  chSemInit(&sem1, 1);
}

static void bmk10_execute(void) {
  uint32_t n = 0;

  test_wait_tick();
  test_start_timer(1000);
  do {
    chSemWait(&sem1);
    chSemSignal(&sem1);
    chSemWait(&sem1);
    chSemSignal(&sem1);
    chSemWait(&sem1);
    chSemSignal(&sem1);
    chSemWait(&sem1);
    chSemSignal(&sem1);
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 4);
  test_println(" wait+signal/S");
}

const struct testcase testbmk10 = {
  bmk10_gettest,
  bmk10_setup,
  NULL,
  bmk10_execute
};

#if CH_USE_MUTEXES
/**
 * @page test_benchmarks_011 Mutexes lock/unlock performance
 *
 * <h2>Description</h2>
 * A mutex is locked/unlocked into a continuous loop, no Context Switch happens
 * because there are no other threads asking for the mutex.<br>
 * The performance is calculated by measuring the number of interactions after
 * a second of continuous operations.
 */

static char *bmk11_gettest(void) {

  return "Benchmark, mutexes lock/unlock";
}

static void bmk11_setup(void) {

  chMtxInit(&mtx1);
}

static void bmk11_execute(void) {
  uint32_t n = 0;

  test_wait_tick();
  test_start_timer(1000);
  do {
    chMtxLock(&mtx1);
    chMtxUnlock();
    chMtxLock(&mtx1);
    chMtxUnlock();
    chMtxLock(&mtx1);
    chMtxUnlock();
    chMtxLock(&mtx1);
    chMtxUnlock();
    n++;
#if defined(WIN32)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 4);
  test_println(" lock+unlock/S");
}

const struct testcase testbmk11 = {
  bmk11_gettest,
  bmk11_setup,
  NULL,
  bmk11_execute
};
#endif

/*
 * Test sequence for benchmarks pattern.
 */
const struct testcase * const patternbmk[] = {
#if !TEST_NO_BENCHMARKS
  &testbmk1,
  &testbmk2,
  &testbmk3,
  &testbmk4,
  &testbmk5,
  &testbmk6,
  &testbmk7,
  &testbmk8,
  &testbmk9,
  &testbmk10,
#if CH_USE_MUTEXES
  &testbmk11,
#endif
#endif
  NULL
};