asm_support.s 8.52 KB
/*
 * Copyright (C) 1996-1998 by the Board of Trustees
 *    of Leland Stanford Junior University.
 * 
 * This file is part of the SimOS distribution. 
 * See LICENSE file for terms of the license. 
 *
 */

#include <asm.h>
#include <sys/regdef.h>
#define CACH_SD         0x3     /* secondary data cache */
#define C_HINV          0x10    /* hit invalidate (all) */
#define C_HWBINV        0x14    /* hit writeback inv. (d, sd) */

/* These labels are made external so that I can set backdoor annotations
   on synchronization entry and exit */        
        .globl _Solo_Barrier_End_1
        .globl _Solo_Barrier_End_2
        .globl _Solo_Lock_End
        .globl _Solo_Unlock_End
        
/*
 * This file contains support routines for the solo runtime. It is 
 * intended to be linked in with the program being run. Note that 
 * it should be linked after all the application code and before
 * libc.
 */        

/*
 *       unsigned long long _SoloLoadDouble(unsigned long long *addr) ; 
 *
 *       Perform a load double to the specified address. Assumes a
 *       "broken" ld instruction.
 *
 */
LEAF(_LoadDouble)
        /* This assumes ld does a laod into v0 and v1 */
         ldc2   v0,(a0)
	j ra
	END(_LoadDouble)

/*
 *       void _StoreDouble(unsigned long long *addr, unsigned long long value) ; 
 *
 *       Perform a load double to the specified address. Assumes a
 *       "broken" ld instruction.
 *
 */
LEAF(_StoreDouble)
        /* This assumes sd does astore from a2 and a3 */
         sdc2   a2, (a0)
	j ra
	END(_StoreDouble)

/*
 *       void _Sync(void)
 *
 *       Do a sync instruction
 *
 */
LEAF(_Sync)
        sync
	j ra
	END(_Sync)

/*
 *       void _Prefetch(char *addr)
 *
 *       Prefetch an address
 */
LEAF(_Prefetch)
        pref   0,(a0)
	j ra
	END(_Prefetch)

/*
 *       void _PrefetchX(char *addr)
 *
 *       Prefetch exclusive an address
 */
LEAF(_PrefetchX)
        pref   1,(a0)
	j ra
	END(_PrefetchX)

/* 
 * void _FlushLine(char *addr)
 */
LEAF(_FlushLine)
.set noreorder
	cache	CACH_SD|C_HINV, 0(a0)
.set reorder
	j	ra
	END(_FlushLine)

/*
 *       void _LockEnter(unsigned int *lockPtr); 
 *
 * Acquire a lock. Two different versions are provided.  By default,
 * the backoff version is used
 */
#if 0
LEAF(_LockEnter)		# This version provides no backoff
    _Locktry:      
        ll   a1,(a0)
        bne  a1,zero,_Locktry
        li   a2,1
        sc   a2,(a0)
        beq  a2,zero,_Locktry
__Lock_End:                
        j    ra                     
	END(_LockEnter)
#endif
                
#if 1
LEAF(_LockEnter)
  /* This version is a smarter way to do exponential backoff: I only backoff when
     the SC fails, so that I'm spinning with low latency until I really experience 
     contention */
/* a2 is used to track the number of retries -- used for exponential backoff */
        move a2,a0
        li   a3,0		# The backoff indicator
    _Locktry:      
        ll   a1,(a2)
        bne  a1,zero,_Locktry

    _Lockgrab:      
        li   a0,1
        sc   a0,(a2)
        bne  a0,zero,__Lock_End
				/*# if SC succeeds, finish up*/

 /* Hardcode "pause" system call to implement random backoff for LL --
   similar to the CheckPlatform implementation below */
    _LLBackoff:     
        li   v0, 1029
        move a0, a3		# Copy current backoff time 
        syscall
        add  a3,a3,1		# Bump backoff time

        b    _Locktry
__Lock_End:                
        j    ra                     
	END(_LockEnter)
#endif
 
        
/*
 *       void _UnLock(unsigned int *lockPtr); 
 *
 *  Release a lock.
 *
 */
LEAF(_Unlock)
        sync
        sw   zero,(a0)
__Unlock_End:
        j    ra
      	END(_Unlock)

/*
 *	Object:
 * _Barrier(Barrier *barrier, int num_procs) ; 
 *
 *  This provides an effeceint barrier
 *
 *     Arguments:
 *      a0  barrier address (count of processes in the barrier )
 *      a0 + 4 Null byte
 *      a1 Number of waiters at the barrier   
 */

/*
 * These offset assumes that lock+0 and lock+4 are in the same
 * cache line while lock+128 is in a difference cache line.
 */
	
#define COUNT_OFFSET    0
#define PREFETCH_OFFSET 4
#define GEN_OFFSET    128   /* On next cache line */
/* #define PREFETCH_ON_LL	*/
/*
 * _Brrier(Barrier *barrier, int num_procs)
 */
LEAF(_Barrier)	

        sync
	/*
	 *  Read the generation number for this barrier. This memory
         *  location will be changed when all have arrived.
	 */
	lw	t0, GEN_OFFSET(a0)
  loop:		
	/*
         * count = LOAD_LINKED(counter);
	 * if (count ==  2*num_proc) count = 0;
         * STORE_CONDITIONAL(counter, count);
         */
#ifdef PREFETCH_ON_LL	
	sb      zero, PREFETCH_OFFSET(a0)
#endif	
	ll	v0, COUNT_OFFSET(a0)
	addu	v0, v0, 1
	mul	a2, a1, 2
	bne	v0, a2, 1f
	move	v0, zero
1:
	move    t1, v0
	sc	v0, COUNT_OFFSET(a0)
	beq	v0, 0, loop

	/*
	 * If this is the last process to arrive, release the rest.
         * if ((count == 0) || (count == num_proc)) goto release
         */
	beq     t1, 0, release
	beq     t1, a1, release

       /*
        * while(my_gen == *cur_gen) continue;
        */
  wait:		 
	lw      t1, GEN_OFFSET(a0)
	beq     t0, t1, wait
__Barrier_End_1:          
	j       ra

	/*
         * cur_gen++;  // Release all waiters
         */	
  release:	
	addu    t0, t0, 1
	sw      t0, GEN_OFFSET(a0)
__Barrier_End_2:  
	j       ra

        END(_Barrier)

/* I've hardcoded "sync" to be the syscall of choice. It's number is
   1036, and I hardocde its return value in mipsy. */
LEAF(_CheckPlatform)
        li      v0, 1036
        syscall
        j       ra
        END(_CheckPlatform)

LEAF(_Exit)
        li      v0, 1001
        syscall
        j       ra
        END(_SoloExit)


/* 
 * int _FetchAndStore(int *d, int val)
 * Atomically store val into the memory pointed to by d, and return the value
 * previously held by d         
 */

LEAF(_FetchAndStore)
        .set    noreorder
        li      a3,0		# The backoff indicator
        move    a2,a0           # Copy argument
        pref    1,(a2)		# Prefetch exclusive the lock destination 
_FASTry: 
        add     t1, a1, zero    # Set up for loop
        ll      t0,(a2)		# Get old value
        sc   	t1,(a2)		# Store new value
        nop
        bne     t1,zero,_FASEnd # Leave if SC succeeded

 /* Hardcode "pause" system call to implement random backoff for LL --
   similar to the CheckPlatform implementation below */

       ## Exponential backoff
        li      v0, 1029
        move    a0, a3		# Copy current backoff time 
        syscall                 # Back off a random amount 
        add     a3,a3,1		# Bump backoff time

        b      _FASTry
             
_FASEnd:     
        add     v0, t0, zero    # return old value
        j    ra                     
        nop
END(FetchAndStore)
        .set    reorder

/* 
 * int _CompareAndSwap(int *d, int old, int new)
 * Atomically perform this operation:
 * if (*d == old) { *d = new; return 1;} else { return 0; } 
 */

LEAF(_CompareAndSwap)
        .set    noreorder
        li      a3,0		# The backoff indicator
        move    t2,a0           # Copy argument
        pref    1,(t2)		# Prefetch exclusive the lock destination
_CASTry: 
        add     t1, a2, zero    # Set up for loop
        ll      t0,(t2)		# Get old value
        nop
        bne     t0, a1, _CASFail	
				# Check if *d == old
        nop

        sc   	t1,(t2)		# Store new value
        nop
        bne     t1,zero,_CASEnd # Leave if SC succeeded

 /* Hardcode "pause" system call to implement random backoff for LL --
   similar to the CheckPlatform implementation below */

       ## Exponential backoff
        li      v0, 1029
        move    a0, a3		# Copy current backoff time 
        syscall                 # Back off a random amount 
        add     a3,a3,1		# Bump backoff time

        b      _CASTry

_CASEnd:
        li      v0, 1		# Return success
        j       ra
        nop
	
_CASFail:
        add     v0, zero, zero	# Return failure
        j       ra
        nop                                
END(CompareAndSwap)
        .set    reorder


        
LEAF(_CountDown)
        .set    noreorder

        beq     $4, 0, _CD_done
        nop
_CD:    
        addu    $4, $4, -1
        bgtz    $4, _CD
        nop
_CD_done:       
        j       ra
        nop

END(_CountDown)
        .set    reorder

                                
/*
 *      Object:
 *              LibcStart()
 *
 *      Set a marker for the start of the libc functions. 
 *
 */
LEAF(LibcStart)
	j ra
	END(LibcStart);

/* WARNING - WARNING: LibcStart MUST be the last function in this
 * file.  If you add a function after this it will cause the
 * the simulator to acquire the libc mutex when it is called.
 */