The attached patch fixes some potential problems with the aacraid
driver on Linux 2.4 on SMP systems.  I haven't reviewed the 2.2 driver
source but potentially the same problem (#2, at least) is there too.
These are things I've come across while trying to track down curious
bugs on our 3 PowerEdge 4400's with Perc 3/Di's.  #1 and #2 seem to
have increased stability of the machine somewhat; still not perfect,
though.


Specifically, I have addressed the following issues:

1.  __SMP__ doesn't exist any more on 2.4; it needs to be replaced
    with
    CONFIG_SMP for the calls to lock_kernel() to be activated - this
    patch does this replacement;

2.  The "flags" variable used in OsSpinLockAcquire and
    OsSpinLockRelease
    was shared between CPUs.  This means that the flags from a
    (second)
    CPU trying to acquire a spinlock whilst another one held it would
    incorrectly be restored onto the CPU currently holding the
    spinlock
    when it relinquished it - potentially re-enabling interrupts on
    that
    CPU at that time.  I've replaced the shared "cpu_flag" variable
    with
    a per-cpu array of flags to address this issue;

3.  If, for some reason, a SendFib failed in
    AacHba_DoScsi{Read,Write},
    the Fib wasn't freed.  The code addresses this, and returns a
    QUEUE_FULL message to the upper layers.

    A few other minor memory leaks on error that I encountered while
    perusing the source have been resolved.

You may wish to update the "official" patches with these changes.

Chris

--
Christopher Pascoe - B.E. Computer Systems (Honours)
Systems Programming Manager
School of Computer Science and Electrical Engineering
The University of Queensland   Brisbane  QLD  4072  Australia
Web: http://www.csee.uq.edu.au/~chrisp      Email: c.pascoe@csee.uq.edu.au
Phone (St. Lucia Campus): +61-7-336-52900   Fax:   +61-7-3365-2909


diff -w -r -u aac-pristine/drivers/scsi/aacraid/aachba.c linux/drivers/scsi/aacraid/aachba.c
--- aac-pristine/drivers/scsi/aacraid/aachba.c	Mon May 28 16:17:49 2001
+++ linux/drivers/scsi/aacraid/aachba.c	Mon May 28 16:26:32 2001
@@ -974,6 +974,7 @@
 
 	if( wait ) 
 	{
+		// This path shouldn't ever get executed with the current driver
 		Status = Adapter->CommFuncs.SendFib( ContainerCommand,
 											 cmd_fibcontext,
 											 FibSize,
@@ -1020,6 +1021,17 @@
 											 TRUE,
 											 ( PFIB_CALLBACK )AacHba_ReadCallback,
 											 ( void *)scsi_cmnd_ptr );
+
+		// Check that the command queued to the controller
+		if (Status != STATUS_PENDING) { 
+			cmn_err( CE_WARN, "AacHba_DoScsiRead: SendFib failed with status: %d\n", 
+					Status);
+
+			// For some reason, the Fib didn't queue, return QUEUE_FULL
+			scsi_cmnd_ptr->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL ;
+			goto err_return;
+		}
+
 		// don't call done func here
 		return ( 0 );
 	}
@@ -1115,7 +1127,7 @@
 		  scatterlist_ptr = (struct scatterlist *)scsi_cmnd_ptr->request_buffer;
 
 		  cmn_err( CE_WARN, "\n");
-		  cmn_err( CE_WARN, "AacHba_`DoScsiWrite: WRITE request is larger than 64K");
+		  cmn_err( CE_WARN, "AacHba_DoScsiWrite: WRITE request is larger than 64K");
 		  cmn_err( CE_WARN, "AacHba_DoScsiWrite: ByteCount: %d", BlockWriteDisk->ByteCount);
 /*  		  cmn_err( CE_WARN, "AacHba_DoScsiWrite: SG ELEMENTS: %d", scsi_cmnd_ptr->use_sg); */
 /*  		  cmn_err( CE_WARN, "Dump SG Element Size..."); */
@@ -1214,6 +1226,7 @@
 
 	if( wait ) 
 	{
+		// This path shouldn't ever get executed with the current driver
 		Status = Adapter->CommFuncs.SendFib( ContainerCommand,
 											 cmd_fibcontext,
 											 FibSize,
@@ -1258,6 +1271,16 @@
 											 TRUE,
 											 ( PFIB_CALLBACK )AacHba_WriteCallback,
 											 ( void * )scsi_cmnd_ptr );
+
+		// Check that the command queued to the controller
+		if (Status != STATUS_PENDING) { 
+			cmn_err( CE_WARN, "AacHba_DoScsiWrite: SendFib failed with status: %d\n", 
+					Status);
+
+			// For some reason, the Fib didn't queue, return QUEUE_FULL
+			scsi_cmnd_ptr->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | QUEUE_FULL ;
+			goto err_return;
+		}
 
 		// don't call done func here - it should be called by the WriteCallback
 		return ( 0 );
diff -w -r -u aac-pristine/drivers/scsi/aacraid/commsup.c linux/drivers/scsi/aacraid/commsup.c
--- aac-pristine/drivers/scsi/aacraid/commsup.c	Mon May 28 16:17:49 2001
+++ linux/drivers/scsi/aacraid/commsup.c	Mon May 28 16:26:51 2001
@@ -149,8 +149,11 @@
 	// Allocate space to describe this zone segment.
 	//
 
-	cmn_err (CE_DEBUG, "Entered FsaExtendFibConextZone");
+	cmn_err (CE_DEBUG, "Entered FsaExtendFibContextZone");
 	ZoneSegment = OsAllocMemory( sizeof( FIB_CONTEXT_ZONE_SEGMENT ), OS_ALLOC_MEM_SLEEP );
+	if (ZoneSegment == NULL) {
+		return (FALSE);
+	}
 
 	ExtendSize = Adapter->FibContextZoneExtendSize;
 	ZoneSegmentAllocSize = (ExtendSize * sizeof(COMM_FIB_CONTEXT)) + sizeof(ZONE_SEGMENT_HEADER);
@@ -158,6 +161,7 @@
 	FibContextSegment = OsAllocMemory( ZoneSegmentAllocSize, OS_ALLOC_MEM_SLEEP );
 
 	if (FibContextSegment == NULL) {
+		OsFreeMemory(ZoneSegment);
 		return (FALSE);
 	}	
 
@@ -2033,6 +2037,7 @@
 
 					} else {
 
+						cmn_err (CE_WARN, "aifd: didn't allocate NewFib");
 
 					}
 
diff -w -r -u aac-pristine/drivers/scsi/aacraid/include/osheaders.h linux/drivers/scsi/aacraid/include/osheaders.h
--- aac-pristine/drivers/scsi/aacraid/include/osheaders.h	Mon May 28 16:17:49 2001
+++ linux/drivers/scsi/aacraid/include/osheaders.h	Mon May 28 16:29:04 2001
@@ -37,13 +37,13 @@
 
 #if defined( MODVERSIONS ) && defined( MODULE )
 #if DRIVER_KERNEL_CODE >= KERNEL_VERSION(2,2,12)
-#ifdef __SMP__
+#ifdef CONFIG_SMP
 #include <linux/modversions-smp.h>
 #elif defined( BOOT_DRIVER ) 
 #include <linux/modversions-BOOT.h>
 #else 
 #include <linux/modversions-up.h>
-#endif // ifdef __SMP__
+#endif // ifdef CONFIG_SMP
 #else
 #include <linux/modversions.h>
 #endif
diff -w -r -u aac-pristine/drivers/scsi/aacraid/include/ostypes.h linux/drivers/scsi/aacraid/include/ostypes.h
--- aac-pristine/drivers/scsi/aacraid/include/ostypes.h	Mon May 28 16:17:49 2001
+++ linux/drivers/scsi/aacraid/include/ostypes.h	Sat May 26 11:10:35 2001
@@ -65,7 +65,7 @@
 {
 	spinlock_t	spin_lock;
 	unsigned cpu_lock_count[NR_CPUS];
-	long cpu_flag;
+	unsigned long cpu_flags[NR_CPUS];
 	long lockout_count;
 } OS_SPINLOCK;
 
diff -w -r -u aac-pristine/drivers/scsi/aacraid/linit.c linux/drivers/scsi/aacraid/linit.c
--- aac-pristine/drivers/scsi/aacraid/linit.c	Mon May 28 16:17:49 2001
+++ linux/drivers/scsi/aacraid/linit.c	Mon May 28 16:30:23 2001
@@ -165,7 +165,7 @@
 		Announce the driver name, version and date.
  *----------------------------------------------------------------------------*/
 static void AAC_AnnounceDriver( void ){
-  printk("<1>%s, %s\n", 
+  printk(KERN_ALERT "%s, %s\n", 
 		 "aacraid raid driver version", AAC_DRIVER_BUILD_DATE );   
   schedule();
 }
@@ -265,7 +265,7 @@
 						}
 			
 
-						printk("<1>%s device detected\n", DeviceName );
+						printk(KERN_ALERT "%s device detected\n", DeviceName );
 						cmn_err(CE_DEBUG, "%x/%x/%x/%x", vendor_id, device_id, sub_vendor_id, sub_system_id);
 
 						// Increment the host adapter count
@@ -417,17 +417,18 @@
 	void ( *CompletionRoutine )( Scsi_Cmnd * ) )
 /*----------------------------------------------------------------------------*/
 {
+	int ret;
 	scsi_cmnd_ptr->scsi_done = CompletionRoutine;
 
 	// AacHba_DoScsiCmd() handles command processing, setting the 
 	// result code and calling completion routine. 
 	#ifdef SYNC_FIB
-	if( AacHba_DoScsiCmd( scsi_cmnd_ptr, 1 ) )	// called with wait = TRUE
+	if( (ret = AacHba_DoScsiCmd( scsi_cmnd_ptr, 1 )) != 0 )	// call with wait = TRUE
 	#else
-	if( AacHba_DoScsiCmd( scsi_cmnd_ptr, 0 ) )	// called with wait = FALSE
+	if( (ret = AacHba_DoScsiCmd( scsi_cmnd_ptr, 0 )) != 0 )	// call with wait = FALSE
 	#endif
 		cmn_err( CE_DEBUG, "AacHba_DoScsiCmd failed" );
-	return 0;
+	return ret;
 } 
 
 
diff -w -r -u aac-pristine/drivers/scsi/aacraid/osddi.c linux/drivers/scsi/aacraid/osddi.c
--- aac-pristine/drivers/scsi/aacraid/osddi.c	Mon May 28 16:17:49 2001
+++ linux/drivers/scsi/aacraid/osddi.c	Mon May 28 16:30:45 2001
@@ -324,7 +324,7 @@
 
 	// use_init_fs_context();  only exists in 2.2.13 onward.
 
-#ifdef __SMP__
+#ifdef CONFIG_SMP
 	lock_kernel();
 #endif
 
@@ -348,7 +348,7 @@
 	if (fs)
 		atomic_inc(&fs->count);
 
-#ifdef __SMP__
+#ifdef CONFIG_SMP
 	unlock_kernel();
 #endif
 
@@ -480,7 +480,7 @@
 	AAC_STATUS	
 --*/
 {
-	printk( "<1>AfaPortBuildSgMap: unimplemented function called" );
+	printk( KERN_ALERT "AfaPortBuildSgMap: unimplemented function called" );
 	return (STATUS_UNSUCCESSFUL);
 }
 
diff -w -r -u aac-pristine/drivers/scsi/aacraid/osfuncs.c linux/drivers/scsi/aacraid/osfuncs.c
--- aac-pristine/drivers/scsi/aacraid/osfuncs.c	Mon May 28 16:17:49 2001
+++ linux/drivers/scsi/aacraid/osfuncs.c	Mon May 28 16:31:52 2001
@@ -214,8 +214,7 @@
 	OS_SPINLOCK *SpinLock )
 /*----------------------------------------------------------------------------*/
 {
-	unsigned cpu_id, i;
-
+	unsigned cpu_id;
 
 	if( SpinLock )
 	{
@@ -225,7 +224,7 @@
 				 cpu_id, SpinLock->cpu_lock_count[ cpu_id ]);
 		}		  
 		
-		spin_lock_irqsave( &( SpinLock->spin_lock ), SpinLock->cpu_flag );
+		spin_lock_irqsave( &( SpinLock->spin_lock ), SpinLock->cpu_flags[ cpu_id ] );
 		SpinLock->cpu_lock_count[ cpu_id ]++;
 	   
 	} else {
@@ -239,10 +238,13 @@
 	OS_SPINLOCK *SpinLock )
 /*----------------------------------------------------------------------------*/
 {
+	unsigned cpu_id;
+
 	if( SpinLock )
 	{
-		SpinLock->cpu_lock_count[ smp_processor_id() ]--;
-		spin_unlock_irqrestore( &( SpinLock->spin_lock ), SpinLock->cpu_flag );
+		cpu_id = smp_processor_id();
+		SpinLock->cpu_lock_count[ cpu_id ]--;
+		spin_unlock_irqrestore( &( SpinLock->spin_lock ), SpinLock->cpu_flags[ cpu_id ] );
 	}
 	else
 		cmn_err( CE_WARN, "OsSpinLockRelease: lock does not exist" );
@@ -254,7 +256,7 @@
 	OS_SPINLOCK *SpinLock )
 /*----------------------------------------------------------------------------*/
 {
-#ifdef __SMP__
+#ifdef CONFIG_SMP
 	if( SpinLock->spin_lock.lock != 0 )
 		return( 1 );
 	else
@@ -576,7 +578,7 @@
 	va_end(ap);
 	
 	if( flag <= g_options.message_level )
-		printk("<1>%s\n", buf);
+		printk(KERN_ALERT "%s\n", buf);
 }
 
 /*  void aac_show_tasks (struct list_head *our_tasks){ */
