hw_bcopy.c 10.3 KB
/*
 * Copyright (C) 1996-1998 by the Board of Trustees
 *    of Leland Stanford Junior University.
 * 
 * This file is part of the SimOS distribution. 
 * See LICENSE file for terms of the license. 
 *
 */

#ifdef HWBCOPY
/*
 * hw_bcopy.c - Routines to simulate HWBCOPY
 */
#include <stdarg.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <malloc.h>
#include <sys/file.h>
#include <sys/param.h>

#include "sim.h"
#include "simtypes.h"
#include "simdetail.h"
#include "hw_bcopy.h"
#include "simconfig.h"
#include "simcp0.h"
#include "simmp.h"
#include "cpu_interface.h"
#include "assert.h"
#include "sim_error.h"
#include "dma.h"
#include "arch_specifics.h"
#include "machine_params.h"

#define MAX_HWB_PAGES 8
#define HWBCOPY_BUFSIZE SCACHE_LINE_SIZE
#define NBPP SIMCP0_PAGE_SIZE

/* RPB hack hack -- for Heinlein-style fmemcpy, 
   we need to be able to translate virtual addresses */
extern Result TranslateVirtualNoSideeffect(int cpuNum, VA vAddr, PA *pAddr);

typedef enum { 
   DMA_INITZERO, DMA_INITCOPY, DMA_ZERO, 
   DMA_READ, DMA_WRITE, DMA_FLAGREAD, DMA_FLAGWRITE 
} HwbDmaType;

typedef struct {
   HwbDmaType type;
   DMARequest request;
   caddr_t *src;
   caddr_t *dst;
   int offset;
   int srcoff;
   int dstoff;
   char buffer[128];
} HwbDmaState;

typedef struct {
   caddr_t vsrc;
   caddr_t vdst;
   caddr_t psrc[MAX_HWB_PAGES];
   caddr_t pdst[MAX_HWB_PAGES];
   int next_offset;
   int cnt;
   caddr_t flag;
   bool busy;
   int dma_active;
   HwbDmaState dmastate[NUM_DMA_CHANNELS];
} HwbData;

int Simhwbcopy_BlockCopy(char *, char *, int, uint64 *);
int Simhwbcopy_BlockZero(char *, int, uint64 *);
void Simhwbcopy_dma_done(int);
static void Simhwbcopy_finish(int);
static void Simhwbcopy_abort(int);

static bool prefetch = 0;
static bool zero_cost = 0;

int hwbDMADelay;
int hwbDMAChannels;
int hwbDMA;
int hwprefetch;
int hwprefRange;
int hwprefDepth;

static HwbData HwbcopyData[SIM_MAXCPUS];

int
Simhwbcopy_init(void)
{
   int i;

   zero_cost = FALSE;
   prefetch = FALSE;

   for (i = 0; i < SIM_MAXCPUS; i++) {
      HwbcopyData[i].busy = FALSE;
      HwbcopyData[i].dma_active = 0;
   }

   return 0;
}

int
Simhwbcopy_BlockZero(char *dst, int cnt, uint64 *flag)
{
   int cpu = Simmp_CPUID();
   HwbData *data = HwbcopyData + cpu;
   
   ASSERT(!data->busy);
   CPUPrint("HWB %d: Cycle %lld, BlockZero (0x%x,%d,0x%x)\n", 
            cpu, CPUVec.CycleCount(cpu), dst, cnt, flag);
   
   data->busy = TRUE;
   data->dma_active = 0;
   if (flag == NULL)
      data->flag = NULL;
   else
      data->flag = (caddr_t) K0_TO_PHYS((uint) flag);
   data->vdst = dst;
   
   if (SimdetailVec->myCPUType[Simmp_CPUID()] == MIPSY) {
      int i;
      Result ret;

      ret = TranslateVirtualNoSideeffect(cpu,(VA) dst,(PA*)&(data->pdst[0]));
      if (ret == FAILURE) {
         Simhwbcopy_abort(cpu);
         return 1;
      }

      data->next_offset = 0;
      data->cnt = cnt;
      
      if (zero_cost) 
         ENTER_MAGIC_SECTION(cpu);
      
      for (i = 0; i < hwbDMAChannels; i++) {
         data->dmastate[i].type = DMA_INITZERO;
         data->dmastate[i].request.execCpu = cpu;
         Simhwbcopy_dma_done((int) &(data->dmastate[i]));
      }
   } else if (IS_KSEG0(dst)) {
      bzero(dst, cnt);
      (*(PHYS_TO_MEMADDR(M_FROM_CPU(cpu),data->flag)))++;
      Simhwbcopy_finish(cpu);
   } else {
      Simhwbcopy_abort(cpu);
      return 1;
   }
   CPUPrint("HWB %d: Cycle %lld, returning control to kernel\n", 
            cpu, CPUVec.CycleCount(cpu));
   return 0;
}

int
Simhwbcopy_BlockCopy(char *src, char *dst, int cnt, uint64 *flag)
{
   int cpu = Simmp_CPUID();
   HwbData *data = HwbcopyData + cpu;

   /* Total unmitigated hackage: if the flag is not the right one for 
      this CPU (because the process moved at a bad time), bail out */
   if ((((int) flag - 0x601b82a0) / 0x80) != cpu) {
      CPUPrint("HWB %d: Cycle %lld, aborting BlockCopy (0x%x,0x%x,%d,0x%x) on bad flag\n",
               cpu, CPUVec.CycleCount(cpu), src, dst, cnt, flag);
      return 1;
   }
   ASSERT(!data->busy);
   CPUPrint("HWB %d: Cycle %lld, BlockCopy (0x%x,0x%x,%d,0x%x)\n", 
            cpu, CPUVec.CycleCount(cpu), src, dst, cnt, flag);
  
   data->busy = TRUE;
   data->dma_active = 0;
   if (flag == NULL)
      data->flag = NULL;
   else
      data->flag = (caddr_t) K0_TO_PHYS((uint) flag);
   data->vsrc = src;
   data->vdst = dst;

   if (SimdetailVec->myCPUType[Simmp_CPUID()] == MIPSY) {
      int i;
      Result ret;

      int srcPages = ((((uint)src+cnt-1) / NBPP) - ((uint)src / NBPP) + 1);
      int dstPages = ((((uint)dst+cnt-1) / NBPP) - ((uint)dst / NBPP) + 1);

      data->next_offset = 0;
      data->cnt = cnt;

      src -= (uint)src % NBPP;
      for (i = 0; i < srcPages; i++, src += NBPP) {
         ret = TranslateVirtualNoSideeffect(cpu,(VA)src,(PA*)&(data->psrc[i]));
         if (ret == FAILURE) {
            Simhwbcopy_abort(cpu);
            return 1;
         }
      }

      dst -= (uint)dst % NBPP;
      for (i = 0; i < dstPages; i++, dst += NBPP) {
         ret = TranslateVirtualNoSideeffect(cpu,(VA)dst,(PA*)&(data->pdst[i]));
         if (ret == FAILURE) {
            Simhwbcopy_abort(cpu);
            return 1;
         }
      }

      if (zero_cost) 
         ENTER_MAGIC_SECTION(cpu);
     
      for (i = 0; i < hwbDMAChannels; i++) {
         data->dmastate[i].type = DMA_INITCOPY;
         data->dmastate[i].request.execCpu = cpu;
         Simhwbcopy_dma_done((int) &(data->dmastate[i]));
      }
   } else if (IS_KSEG0(src) && IS_KSEG0(dst)) {
      bcopy(data->vsrc, data->vdst, cnt);
      (*(PHYS_TO_MEMADDR(M_FROM_CPU(cpu),data->flag)))++;
      Simhwbcopy_finish(cpu);
   } else {
      Simhwbcopy_abort(cpu);
      return 1;
   }
   CPUPrint("HWB %d: Cycle %lld, returning control to kernel\n", 
            cpu, CPUVec.CycleCount(cpu));
   return 0;
}

void Simhwbcopy_dma_done(int dmadata)
{
   HwbDmaState *dmastate = (HwbDmaState *) dmadata;
   DMARequest *request = &(dmastate->request);
   int cpu = request->execCpu;
   HwbData *data = HwbcopyData + cpu;
   int remaining = data->cnt - data->next_offset;

   switch (dmastate->type) {
   case DMA_INITZERO:
      /* Zero out the data buffer for writing */
      bzero(dmastate->buffer, HWBCOPY_BUFSIZE);
      data->dma_active++;
      dmastate->type        = DMA_ZERO;
      dmastate->dst         = &(data->pdst[0]);
      request->pAddrs       = dmastate->dst;
      request->isDMAWrite   = TRUE;
   case DMA_ZERO:
      if (remaining <= 0) goto dma_done;
      request->data         = dmastate->buffer;
      request->offset       = data->next_offset;
      request->remainingLen = MIN(HWBCOPY_BUFSIZE, remaining);
      request->amountMoved  = 0;
      data->next_offset    += request->remainingLen;
#ifndef DATA_HANDLING_notdef
#else
      bzero(PHYS_TO_MEMADDR(M_FROM_CPU(cpu), data->pdst + request->offset), 
            request->remainingLen);
#endif
      break;
   case DMA_INITCOPY:
      data->dma_active++;

      dmastate->src         = &(data->psrc[0]);
      dmastate->srcoff      = (uint)data->vsrc % NBPP;
      dmastate->dst         = &(data->pdst[0]);
      dmastate->dstoff      = (uint)data->vdst % NBPP;

      dmastate->offset      = 0;
   case DMA_WRITE:
      if (remaining <= 0) goto dma_done;
      dmastate->type        = DMA_READ;
      request->data         = dmastate->buffer; 
      request->remainingLen = MIN(HWBCOPY_BUFSIZE, remaining);
      request->amountMoved  = 0;

      /* Update src and dst offsets and address pointers */
      dmastate->srcoff     += data->next_offset - dmastate->offset;
      dmastate->dstoff     += data->next_offset - dmastate->offset;
      while (dmastate->srcoff >= NBPP) {
         dmastate->src++;
         dmastate->srcoff -= NBPP;
      }
      while (dmastate->dstoff >= NBPP) {
         dmastate->dst++;
         dmastate->dstoff -= NBPP;
      }
      dmastate->offset      = data->next_offset;

      request->pAddrs       = dmastate->src;
      request->offset       = dmastate->srcoff;
      request->isDMAWrite   = FALSE;
      data->next_offset    += request->remainingLen;
      break;
   case DMA_READ:
      dmastate->type        = DMA_WRITE;
      request->data         = dmastate->buffer;
      request->remainingLen = request->amountMoved;
      request->amountMoved  = 0;
      request->pAddrs       = dmastate->dst;
      request->offset       = dmastate->dstoff;
      request->isDMAWrite   = TRUE;
#ifndef DATA_HANDLING_notdef
#else
      bcopy(PHYS_TO_MEMADDR(M_FROM_CPU(cpu), data->psrc + request->offset), 
            PHYS_TO_MEMADDR(M_FROM_CPU(cpu), data->pdst + request->offset), 
            request->remainingLen);
#endif
      break;
   dma_done:
      /* If there's still active DMA processes, just return */
      if (--(data->dma_active) > 0) return;
      dmastate->type        = DMA_FLAGREAD;
      request->pAddrs       = &(data->flag);
      request->data         = dmastate->buffer;
      request->offset       = 0;
      request->remainingLen = sizeof(uint64);
      request->amountMoved  = 0;
      request->isDMAWrite   = FALSE;
      break;
   case DMA_FLAGREAD:
      (*((uint64 *) dmastate->buffer))++;
      dmastate->type        = DMA_FLAGWRITE;
      request->data         = dmastate->buffer;
      request->offset       = 0;
      request->remainingLen = sizeof(uint64);
      request->amountMoved  = 0;
      request->isDMAWrite   = TRUE;
#ifndef DATA_HANDLING_notdef
#else
      (*(uint64 *)flag)++;
#endif
       break;
   case DMA_FLAGWRITE:
      Simhwbcopy_finish(cpu);
      return;
   }
   SIM_DEBUG(('g', "HWB %d: Cycle %lld, DMAdoTransfer, addr = 0x%x, "
              "length = 0x%x, write = %d\n",
              cpu, CPUVec.CycleCount(cpu),
              ((int)*(request->pAddrs))+request->offset,
              request->remainingLen, request->isDMAWrite));
   
   DMAdoTransfer(cpu, request,
                 (SimTime) CPUVec.CycleCount(cpu) + hwbDMADelay,
                 Simhwbcopy_dma_done, (int) dmastate, cpu);
}

static void
Simhwbcopy_finish(int cpu)
{  
   HwbData *data = HwbcopyData + cpu;
   data->busy = FALSE;
   if (zero_cost)
      EXIT_MAGIC_SECTION(cpu);
   CPUPrint("HWB %d: Cycle %lld, done with HWBCOPY operation\n", 
            cpu, CPUVec.CycleCount(cpu));
}

static void
Simhwbcopy_abort(int cpu)
{  
   HwbData *data = HwbcopyData + cpu;
   data->busy = FALSE;
   CPUPrint("HWB %d: Cycle %lld, aborting HWBCOPY operation\n", 
            cpu, CPUVec.CycleCount(cpu));
}

#endif