v2.4.6.8 -> v2.4.6.9
[opensuse:kernel.git] / drivers / i2o / i2o_block.c
1 /*
2  * I2O Random Block Storage Class OSM
3  *
4  * (C) Copyright 1999 Red Hat Software
5  *      
6  * Written by Alan Cox, Building Number Three Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  *
13  * This is a beta test release. Most of the good code was taken
14  * from the nbd driver by Pavel Machek, who in turn took some of it
15  * from loop.c. Isn't free software great for reusability 8)
16  *
17  * Fixes/additions:
18  *      Steve Ralston:  
19  *              Multiple device handling error fixes,
20  *              Added a queue depth.
21  *      Alan Cox:       
22  *              FC920 has an rmw bug. Dont or in the end marker.
23  *              Removed queue walk, fixed for 64bitness.
24  *      Deepak Saxena:
25  *              Independent queues per IOP
26  *              Support for dynamic device creation/deletion
27  *              Code cleanup    
28  *              Support for larger I/Os through merge* functions 
29  *              (taken from DAC960 driver)
30  *      Boji T Kannanthanam:
31  *              Set the I2O Block devices to be detected in increasing 
32  *              order of TIDs during boot.
33  *              Search and set the I2O block device that we boot off from  as
34  *              the first device to be claimed (as /dev/i2o/hda)
35  *              Properly attach/detach I2O gendisk structure from the system
36  *              gendisk list. The I2O block devices now appear in 
37  *              /proc/partitions.
38  *
39  *      To do:
40  *              Serial number scanning to find duplicates for FC multipathing
41  */
42
43 #include <linux/major.h>
44
45 #include <linux/module.h>
46
47 #include <linux/sched.h>
48 #include <linux/fs.h>
49 #include <linux/stat.h>
50 #include <linux/errno.h>
51 #include <linux/file.h>
52 #include <linux/ioctl.h>
53 #include <linux/i2o.h>
54 #include <linux/blkdev.h>
55 #include <linux/blkpg.h>
56 #include <linux/slab.h>
57 #include <linux/hdreg.h>
58
59 #include <linux/notifier.h>
60 #include <linux/reboot.h>
61
62 #include <asm/uaccess.h>
63 #include <asm/semaphore.h>
64 #include <asm/io.h>
65 #include <asm/atomic.h>
66 #include <linux/smp_lock.h>
67 #include <linux/wait.h>
68
69 #define MAJOR_NR I2O_MAJOR
70
71 #include <linux/blk.h>
72
73 #define MAX_I2OB        16
74
75 #define MAX_I2OB_DEPTH  128
76 #define MAX_I2OB_RETRIES 4
77
78 //#define DRIVERDEBUG
79 #ifdef DRIVERDEBUG
80 #define DEBUG( s )
81 #else
82 #define DEBUG( s ) printk( s ) 
83 #endif
84
85 /*
86  * Events that this OSM is interested in
87  */
88 #define I2OB_EVENT_MASK         (I2O_EVT_IND_BSA_VOLUME_LOAD |  \
89                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
90                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
91                                  I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
92                                  I2O_EVT_IND_BSA_SCSI_SMART )
93
94
95 /*
96  * I2O Block Error Codes - should be in a header file really...
97  */
98 #define I2O_BSA_DSC_SUCCESS             0x0000
99 #define I2O_BSA_DSC_MEDIA_ERROR         0x0001
100 #define I2O_BSA_DSC_ACCESS_ERROR        0x0002
101 #define I2O_BSA_DSC_DEVICE_FAILURE      0x0003
102 #define I2O_BSA_DSC_DEVICE_NOT_READY    0x0004
103 #define I2O_BSA_DSC_MEDIA_NOT_PRESENT   0x0005
104 #define I2O_BSA_DSC_MEDIA_LOCKED        0x0006
105 #define I2O_BSA_DSC_MEDIA_FAILURE       0x0007
106 #define I2O_BSA_DSC_PROTOCOL_FAILURE    0x0008
107 #define I2O_BSA_DSC_BUS_FAILURE         0x0009
108 #define I2O_BSA_DSC_ACCESS_VIOLATION    0x000A
109 #define I2O_BSA_DSC_WRITE_PROTECTED     0x000B
110 #define I2O_BSA_DSC_DEVICE_RESET        0x000C
111 #define I2O_BSA_DSC_VOLUME_CHANGED      0x000D
112 #define I2O_BSA_DSC_TIMEOUT             0x000E
113
114 /*
115  *      Some of these can be made smaller later
116  */
117
118 static int i2ob_blksizes[MAX_I2OB<<4];
119 static int i2ob_hardsizes[MAX_I2OB<<4];
120 static int i2ob_sizes[MAX_I2OB<<4];
121 static int i2ob_media_change_flag[MAX_I2OB];
122 static u32 i2ob_max_sectors[MAX_I2OB<<4];
123
124 static int i2ob_context;
125
126 /*
127  * I2O Block device descriptor 
128  */
129 struct i2ob_device
130 {
131         struct i2o_controller *controller;
132         struct i2o_device *i2odev;
133         int unit;
134         int tid;
135         int flags;
136         int refcnt;
137         struct request *head, *tail;
138         request_queue_t *req_queue;
139         int max_segments;
140         int done_flag;
141         int constipated;
142         int depth;
143 };
144
145 /*
146  *      FIXME:
147  *      We should cache align these to avoid ping-ponging lines on SMP
148  *      boxes under heavy I/O load...
149  */
150 struct i2ob_request
151 {
152         struct i2ob_request *next;
153         struct request *req;
154         int num;
155 };
156
157 /*
158  * Per IOP requst queue information
159  *
160  * We have a separate requeust_queue_t per IOP so that a heavilly
161  * loaded I2O block device on an IOP does not starve block devices
162  * across all I2O controllers.
163  * 
164  */
165 struct i2ob_iop_queue
166 {
167         atomic_t queue_depth;
168         struct i2ob_request request_queue[MAX_I2OB_DEPTH];
169         struct i2ob_request *i2ob_qhead;
170         request_queue_t req_queue;
171 };
172 static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
173 static struct i2ob_request *i2ob_backlog[MAX_I2O_CONTROLLERS];
174 static struct i2ob_request *i2ob_backlog_tail[MAX_I2O_CONTROLLERS];
175
176 /*
177  *      Each I2O disk is one of these.
178  */
179
180 static struct i2ob_device i2ob_dev[MAX_I2OB<<4];
181 static int i2ob_dev_count = 0;
182 static struct hd_struct i2ob[MAX_I2OB<<4];
183 static struct gendisk i2ob_gendisk;     /* Declared later */
184
185 /*
186  * Mutex and spin lock for event handling synchronization
187  * evt_msg contains the last event.
188  */
189 static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
190 static DECLARE_MUTEX_LOCKED(i2ob_thread_dead);
191 static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
192 static u32 evt_msg[MSG_FRAME_SIZE>>2];
193
194 static struct timer_list i2ob_timer;
195 static int i2ob_timer_started = 0;
196
197 static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
198          struct i2o_message *);
199 static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
200 static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
201 static void i2ob_reboot_event(void);
202 static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
203 static void i2ob_end_request(struct request *);
204 static void i2ob_request(request_queue_t *);
205 static int i2ob_backlog_request(struct i2o_controller *, struct i2ob_device *);
206 static int i2ob_init_iop(unsigned int);
207 static request_queue_t* i2ob_get_queue(kdev_t);
208 static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
209 static int do_i2ob_revalidate(kdev_t, int);
210 static int i2ob_evt(void *);
211
212 static int evt_pid = 0;
213 static int evt_running = 0;
214 static int scan_unit = 0;
215
216 /*
217  * I2O OSM registration structure...keeps getting bigger and bigger :)
218  */
219 static struct i2o_handler i2o_block_handler =
220 {
221         i2o_block_reply,
222         i2ob_new_device,
223         i2ob_del_device,
224         i2ob_reboot_event,
225         "I2O Block OSM",
226         0,
227         I2O_CLASS_RANDOM_BLOCK_STORAGE
228 };
229
230 /*
231  *      Get a message
232  */
233
234 static u32 i2ob_get(struct i2ob_device *dev)
235 {
236         struct i2o_controller *c=dev->controller;
237         return I2O_POST_READ32(c);
238 }
239  
240 /*
241  *      Turn a Linux block request into an I2O block read/write.
242  */
243
244 static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, u32 base, int unit)
245 {
246         struct i2o_controller *c = dev->controller;
247         int tid = dev->tid;
248         unsigned long msg;
249         unsigned long mptr;
250         u64 offset;
251         struct request *req = ireq->req;
252         struct buffer_head *bh = req->bh;
253         int count = req->nr_sectors<<9;
254         char *last = NULL;
255         unsigned short size = 0;
256
257         // printk(KERN_INFO "i2ob_send called\n");
258         /* Map the message to a virtual address */
259         msg = c->mem_offset + m;
260         
261         /*
262          * Build the message based on the request.
263          */
264         __raw_writel(i2ob_context|(unit<<8), msg+8);
265         __raw_writel(ireq->num, msg+12);
266         __raw_writel(req->nr_sectors << 9, msg+20);
267
268         /* 
269          * Mask out partitions from now on
270          */
271         unit &= 0xF0;
272                 
273         /* This can be optimised later - just want to be sure its right for
274            starters */
275         offset = ((u64)(req->sector+base)) << 9;
276         __raw_writel( offset & 0xFFFFFFFF, msg+24);
277         __raw_writel(offset>>32, msg+28);
278         mptr=msg+32;
279         
280         if(req->cmd == READ)
281         {
282                 __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
283                 while(bh!=NULL)
284                 {
285                         if(bh->b_data == last) {
286                                 size += bh->b_size;
287                                 last += bh->b_size;
288                                 if(bh->b_reqnext)
289                                         __raw_writel(0x14000000|(size), mptr-8);
290                                 else
291                                         __raw_writel(0xD4000000|(size), mptr-8);
292                         }
293                         else
294                         {
295                                 if(bh->b_reqnext)
296                                         __raw_writel(0x10000000|(bh->b_size), mptr);
297                                 else
298                                         __raw_writel(0xD0000000|(bh->b_size), mptr);
299                                 __raw_writel(virt_to_bus(bh->b_data), mptr+4);
300                                 mptr += 8;      
301                                 size = bh->b_size;
302                                 last = bh->b_data + size;
303                         }
304
305                         count -= bh->b_size;
306                         bh = bh->b_reqnext;
307                 }
308                 /*
309                  *      Heuristic for now since the block layer doesnt give
310                  *      us enough info. If its a big write assume sequential
311                  *      readahead on controller. If its small then don't read
312                  *      ahead but do use the controller cache.
313                  */
314                 if(size >= 8192)
315                         __raw_writel((8<<24)|(1<<16)|8, msg+16);
316                 else
317                         __raw_writel((8<<24)|(1<<16)|4, msg+16);
318         }
319         else if(req->cmd == WRITE)
320         {
321                 __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
322                 while(bh!=NULL)
323                 {
324                         if(bh->b_data == last) {
325                                 size += bh->b_size;
326                                 last += bh->b_size;
327                                 if(bh->b_reqnext)
328                                         __raw_writel(0x14000000|(size), mptr-8);
329                                 else
330                                         __raw_writel(0xD4000000|(size), mptr-8);
331                         }
332                         else
333                         {
334                                 if(bh->b_reqnext)
335                                         __raw_writel(0x14000000|(bh->b_size), mptr);
336                                 else
337                                         __raw_writel(0xD4000000|(bh->b_size), mptr);
338                                 __raw_writel(virt_to_bus(bh->b_data), mptr+4);
339                                 mptr += 8;      
340                                 size = bh->b_size;
341                                 last = bh->b_data + size;
342                         }
343
344                         count -= bh->b_size;
345                         bh = bh->b_reqnext;
346                 }
347
348                 if(c->battery)
349                 {
350                         
351                         if(size>16384)
352                                 __raw_writel(4, msg+16);
353                         else
354                                 /* 
355                                  * Allow replies to come back once data is cached in the controller
356                                  * This allows us to handle writes quickly thus giving more of the
357                                  * queue to reads.
358                                  */
359                                 __raw_writel(16, msg+16);
360                 }
361                 else
362                 {
363                         /* Large write, don't cache */
364                         if(size>8192)
365                                 __raw_writel(4, msg+16);
366                         else
367                         /* write through */
368                                 __raw_writel(8, msg+16);
369                 }
370         }
371         __raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
372         
373         if(count != 0)
374         {
375                 printk(KERN_ERR "Request count botched by %d.\n", count);
376         }
377
378         i2o_post_message(c,m);
379         atomic_inc(&i2ob_queues[c->unit]->queue_depth);
380
381         return 0;
382 }
383
384 /*
385  *      Remove a request from the _locked_ request list. We update both the
386  *      list chain and if this is the last item the tail pointer. Caller
387  *      must hold the lock.
388  */
389  
390 static inline void i2ob_unhook_request(struct i2ob_request *ireq, 
391         unsigned int iop)
392 {
393         ireq->next = i2ob_queues[iop]->i2ob_qhead;
394         i2ob_queues[iop]->i2ob_qhead = ireq;
395 }
396
397 /*
398  *      Request completion handler
399  */
400  
401 static inline void i2ob_end_request(struct request *req)
402 {
403         /*
404          * Loop until all of the buffers that are linked
405          * to this request have been marked updated and
406          * unlocked.
407          */
408
409         while (end_that_request_first( req, !req->errors, "i2o block" ));
410
411         /*
412          * It is now ok to complete the request.
413          */
414         end_that_request_last( req );
415 }
416
417 /*
418  * Request merging functions
419  */
420 static inline int i2ob_new_segment(request_queue_t *q, struct request *req,
421                                   int __max_segments)
422 {
423         int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
424
425         if (__max_segments < max_segments)
426                 max_segments = __max_segments;
427
428         if (req->nr_segments < max_segments) {
429                 req->nr_segments++;
430                 return 1;
431         }
432         return 0;
433 }
434
435 static int i2ob_back_merge(request_queue_t *q, struct request *req, 
436                              struct buffer_head *bh, int __max_segments)
437 {
438         if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
439                 return 1;
440         return i2ob_new_segment(q, req, __max_segments);
441 }
442
443 static int i2ob_front_merge(request_queue_t *q, struct request *req, 
444                               struct buffer_head *bh, int __max_segments)
445 {
446         if (bh->b_data + bh->b_size == req->bh->b_data)
447                 return 1;
448         return i2ob_new_segment(q, req, __max_segments);
449 }
450
451 static int i2ob_merge_requests(request_queue_t *q,
452                                 struct request *req,
453                                 struct request *next,
454                                 int __max_segments)
455 {
456         int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
457         int total_segments = req->nr_segments + next->nr_segments;
458
459         if (__max_segments < max_segments)
460                 max_segments = __max_segments;
461
462         if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
463                 total_segments--;
464     
465         if (total_segments > max_segments)
466                 return 0;
467
468         req->nr_segments = total_segments;
469         return 1;
470 }
471
472 static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit)
473 {
474         unsigned long msg;
475         u32 m = i2ob_get(d);
476         
477         if(m == 0xFFFFFFFF)
478                 return -1;
479                 
480         msg = c->mem_offset + m;
481
482         /*
483          *      Ask the controller to write the cache back. This sorts out
484          *      the supertrak firmware flaw and also does roughly the right
485          *      thing for other cases too.
486          */
487                 
488         __raw_writel(FIVE_WORD_MSG_SIZE|SGL_OFFSET_0, msg);
489         __raw_writel(I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|d->tid, msg+4);
490         __raw_writel(i2ob_context|(unit<<8), msg+8);
491         __raw_writel(0, msg+12);
492         __raw_writel(60<<16, msg+16);
493         
494         i2o_post_message(c,m);
495         return 0;
496 }
497                         
498 /*
499  *      OSM reply handler. This gets all the message replies
500  */
501
502 static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
503 {
504         unsigned long flags;
505         struct i2ob_request *ireq = NULL;
506         u8 st;
507         u32 *m = (u32 *)msg;
508         u8 unit = (m[2]>>8)&0xF0;       /* low 4 bits are partition */
509         struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)];
510         
511         /*
512          * FAILed message
513          */
514         if(m[0] & (1<<13))
515         {
516                 /*
517                  * FAILed message from controller
518                  * We increment the error count and abort it
519                  *
520                  * In theory this will never happen.  The I2O block class
521                  * speficiation states that block devices never return
522                  * FAILs but instead use the REQ status field...but
523                  * better be on the safe side since no one really follows
524                  * the spec to the book :)
525                  */
526                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
527                 ireq->req->errors++;
528
529                 spin_lock_irqsave(&io_request_lock, flags);
530                 i2ob_unhook_request(ireq, c->unit);
531                 i2ob_end_request(ireq->req);
532                 spin_unlock_irqrestore(&io_request_lock, flags);
533         
534                 /* Now flush the message by making it a NOP */
535                 m[0]&=0x00FFFFFF;
536                 m[0]|=(I2O_CMD_UTIL_NOP)<<24;
537                 i2o_post_message(c,virt_to_bus(m));
538
539                 return;
540         }
541
542         if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
543         {
544                 spin_lock(&i2ob_evt_lock);
545                 memcpy(evt_msg, msg, (m[0]>>16)<<2);
546                 spin_unlock(&i2ob_evt_lock);
547                 up(&i2ob_evt_sem);
548                 return;
549         }
550
551         if(msg->function == I2O_CMD_BLOCK_CFLUSH)
552         {
553                 spin_lock_irqsave(&io_request_lock, flags);
554                 dev->constipated=0;
555                 DEBUG(("unconstipated\n"));
556                 if(i2ob_backlog_request(c, dev)==0)
557                         i2ob_request(dev->req_queue);
558                 spin_unlock_irqrestore(&io_request_lock, flags);
559                 return;
560         }
561
562         if(!dev->i2odev)
563         {
564                 /*
565                  * This is HACK, but Intel Integrated RAID allows user
566                  * to delete a volume that is claimed, locked, and in use 
567                  * by the OS. We have to check for a reply from a
568                  * non-existent device and flag it as an error or the system 
569                  * goes kaput...
570                  */
571                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
572                 ireq->req->errors++;
573                 printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
574                 spin_lock_irqsave(&io_request_lock, flags);
575                 i2ob_unhook_request(ireq, c->unit);
576                 i2ob_end_request(ireq->req);
577                 spin_unlock_irqrestore(&io_request_lock, flags);
578                 return;
579         }       
580
581         /*
582          *      Lets see what is cooking. We stuffed the
583          *      request in the context.
584          */
585                  
586         ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
587         st=m[4]>>24;
588
589         if(st!=0)
590         {
591                 int err;
592                 char *bsa_errors[] = 
593                 { 
594                         "Success", 
595                         "Media Error", 
596                         "Failure communicating to device",
597                         "Device Failure",
598                         "Device is not ready",
599                         "Media not present",
600                         "Media is locked by another user",
601                         "Media has failed",
602                         "Failure communicating to device",
603                         "Device bus failure",
604                         "Device is locked by another user",
605                         "Device is write protected",
606                         "Device has reset",
607                         "Volume has changed, waiting for acknowledgement"
608                 };
609                                 
610                 err = m[4]&0xFFFF;
611                 
612                 /*
613                  *      Device not ready means two things. One is that the
614                  *      the thing went offline (but not a removal media)
615                  *
616                  *      The second is that you have a SuperTrak 100 and the
617                  *      firmware got constipated. Unlike standard i2o card
618                  *      setups the supertrak returns an error rather than
619                  *      blocking for the timeout in these cases.
620                  */
621                  
622                 
623                 spin_lock_irqsave(&io_request_lock, flags);
624                 if(err==4)
625                 {
626                         /*
627                          *      Time to uncork stuff
628                          */
629                         
630                         if(!dev->constipated)
631                         {
632                                 dev->constipated = 1;
633                                 DEBUG(("constipated\n"));
634                                 /* Now pull the chain */
635                                 if(i2ob_flush(c, dev, unit)<0)
636                                 {
637                                         DEBUG(("i2ob: Unable to queue flush. Retrying I/O immediately.\n"));
638                                         dev->constipated=0;
639                                 }
640                                 DEBUG(("flushing\n"));
641                         }
642                         
643                         /*
644                          *      Recycle the request
645                          */
646                          
647 //                      i2ob_unhook_request(ireq, c->unit);
648                         
649                         /*
650                          *      Place it on the recycle queue
651                          */
652                          
653                         ireq->next = NULL;
654                         if(i2ob_backlog_tail[c->unit]!=NULL)
655                                 i2ob_backlog_tail[c->unit]->next = ireq;
656                         else
657                                 i2ob_backlog[c->unit] = ireq;                   
658                         i2ob_backlog_tail[c->unit] = ireq;
659                         
660                         atomic_dec(&i2ob_queues[c->unit]->queue_depth);
661
662                         /*
663                          *      If the constipator flush failed we want to
664                          *      poke the queue again. 
665                          */
666                          
667                         i2ob_request(dev->req_queue);
668                         spin_unlock_irqrestore(&io_request_lock, flags);
669                         
670                         /*
671                          *      and out
672                          */
673                          
674                         return; 
675                 }
676                 spin_unlock_irqrestore(&io_request_lock, flags);
677                 printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, 
678                         bsa_errors[m[4]&0XFFFF]);
679                 if(m[4]&0x00FF0000)
680                         printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
681                 printk(".\n");
682                 ireq->req->errors++;    
683         }
684         else
685                 ireq->req->errors = 0;
686
687         /*
688          *      Dequeue the request. We use irqsave locks as one day we
689          *      may be running polled controllers from a BH...
690          */
691         
692         spin_lock_irqsave(&io_request_lock, flags);
693         i2ob_unhook_request(ireq, c->unit);
694         i2ob_end_request(ireq->req);
695         atomic_dec(&i2ob_queues[c->unit]->queue_depth);
696
697         /*
698          *      We may be able to do more I/O
699          */
700
701         if(i2ob_backlog_request(c, dev)==0)
702                 i2ob_request(dev->req_queue);
703
704         spin_unlock_irqrestore(&io_request_lock, flags);
705 }
706
707 /* 
708  * Event handler.  Needs to be a separate thread b/c we may have
709  * to do things like scan a partition table, or query parameters
710  * which cannot be done from an interrupt or from a bottom half.
711  */
712 static int i2ob_evt(void *dummy)
713 {
714         unsigned int evt;
715         unsigned int flags;
716         int unit;
717         int i;
718         //The only event that has data is the SCSI_SMART event.
719         struct i2o_reply {
720                 u32 header[4];
721                 u32 evt_indicator;
722                 u8 ASC;
723                 u8 ASCQ;
724                 u8 data[16];
725                 } *evt_local;
726
727         lock_kernel();
728         daemonize();
729         unlock_kernel();
730
731         strcpy(current->comm, "i2oblock");
732         evt_running = 1;
733
734         while(1)
735         {
736                 if(down_interruptible(&i2ob_evt_sem))
737                 {
738                         evt_running = 0;
739                         printk("exiting...");
740                         break;
741                 }
742
743                 /*
744                  * Keep another CPU/interrupt from overwriting the 
745                  * message while we're reading it
746                  *
747                  * We stuffed the unit in the TxContext and grab the event mask
748                  * None of the BSA we care about events have EventData
749                  */
750                 spin_lock_irqsave(&i2ob_evt_lock, flags);
751                 evt_local = (struct i2o_reply *)evt_msg;
752                 spin_unlock_irqrestore(&i2ob_evt_lock, flags);
753
754                 unit = evt_local->header[3];
755                 evt = evt_local->evt_indicator;
756
757                 switch(evt)
758                 {
759                         /*
760                          * New volume loaded on same TID, so we just re-install.
761                          * The TID/controller don't change as it is the same
762                          * I2O device.  It's just new media that we have to
763                          * rescan.
764                          */
765                         case I2O_EVT_IND_BSA_VOLUME_LOAD:
766                         {
767                                 i2ob_install_device(i2ob_dev[unit].i2odev->controller, 
768                                         i2ob_dev[unit].i2odev, unit);
769                                 break;
770                         }
771
772                         /*
773                          * No media, so set all parameters to 0 and set the media
774                          * change flag. The I2O device is still valid, just doesn't
775                          * have media, so we don't want to clear the controller or
776                          * device pointer.
777                          */
778                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
779                         {
780                                 for(i = unit; i <= unit+15; i++)
781                                 {
782                                         i2ob_sizes[i] = 0;
783                                         i2ob_hardsizes[i] = 0;
784                                         i2ob_max_sectors[i] = 0;
785                                         i2ob[i].nr_sects = 0;
786                                         i2ob_gendisk.part[i].nr_sects = 0;
787                                 }
788                                 i2ob_media_change_flag[unit] = 1;
789                                 break;
790                         }
791
792                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
793                                 printk(KERN_WARNING "%s: Attempt to eject locked media\n", 
794                                         i2ob_dev[unit].i2odev->dev_name);
795                                 break;
796
797                         /*
798                          * The capacity has changed and we are going to be
799                          * updating the max_sectors and other information 
800                          * about this disk.  We try a revalidate first. If
801                          * the block device is in use, we don't want to
802                          * do that as there may be I/Os bound for the disk
803                          * at the moment.  In that case we read the size 
804                          * from the device and update the information ourselves
805                          * and the user can later force a partition table
806                          * update through an ioctl.
807                          */
808                         case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
809                         {
810                                 u64 size;
811
812                                 if(do_i2ob_revalidate(MKDEV(MAJOR_NR, unit),0) != -EBUSY)
813                                         continue;
814
815                                 if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
816                                         i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
817
818                                 spin_lock_irqsave(&io_request_lock, flags);     
819                                 i2ob_sizes[unit] = (int)(size>>10);
820                                 i2ob_gendisk.part[unit].nr_sects = size>>9;
821                                 i2ob[unit].nr_sects = (int)(size>>9);
822                                 spin_unlock_irqrestore(&io_request_lock, flags);        
823                                 break;
824                         }
825
826                         /* 
827                          * We got a SCSI SMART event, we just log the relevant
828                          * information and let the user decide what they want
829                          * to do with the information.
830                          */
831                         case I2O_EVT_IND_BSA_SCSI_SMART:
832                         {
833                                 char buf[16];
834                                 printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",i2ob_dev[unit].i2odev->dev_name);
835                                 evt_local->data[16]='\0';
836                                 sprintf(buf,"%s",&evt_local->data[0]);
837                                 printk(KERN_INFO "      Disk Serial#:%s\n",buf);
838                                 printk(KERN_INFO "      ASC 0x%02x \n",evt_local->ASC);
839                                 printk(KERN_INFO "      ASCQ 0x%02x \n",evt_local->ASCQ);
840                                 break;
841                         }
842                 
843                         /*
844                          *      Non event
845                          */
846                          
847                         case 0:
848                                 break;
849                                 
850                         /*
851                          * An event we didn't ask for.  Call the card manufacturer
852                          * and tell them to fix their firmware :)
853                          */
854                         default:
855                                 printk(KERN_INFO "%s: Received event %d we didn't register for\n"
856                                         KERN_INFO "   Blame the I2O card manufacturer 8)\n", 
857                                         i2ob_dev[unit].i2odev->dev_name, evt);
858                                 break;
859                 }
860         };
861
862         up_and_exit(&i2ob_thread_dead,0);
863         return 0;
864 }
865
866 /*
867  * The timer handler will attempt to restart requests 
868  * that are queued to the driver.  This handler
869  * currently only gets called if the controller
870  * had no more room in its inbound fifo.  
871  */
872
873 static void i2ob_timer_handler(unsigned long q)
874 {
875         unsigned long flags;
876
877         /*
878          * We cannot touch the request queue or the timer
879          * flag without holding the io_request_lock.
880          */
881         spin_lock_irqsave(&io_request_lock,flags);
882
883         /* 
884          * Clear the timer started flag so that 
885          * the timer can be queued again.
886          */
887         i2ob_timer_started = 0;
888
889         /* 
890          * Restart any requests.
891          */
892         i2ob_request((request_queue_t*)q);
893
894         /* 
895          * Free the lock.
896          */
897         spin_unlock_irqrestore(&io_request_lock,flags);
898 }
899
900 static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev)
901 {
902         u32 m;
903         struct i2ob_request *ireq;
904         
905         while((ireq=i2ob_backlog[c->unit])!=NULL)
906         {
907                 int unit;
908
909                 if(atomic_read(&i2ob_queues[c->unit]->queue_depth) > dev->depth/4)
910                         break;
911
912                 m = i2ob_get(dev);
913                 if(m == 0xFFFFFFFF)
914                         break;
915
916                 i2ob_backlog[c->unit] = ireq->next;
917                 if(i2ob_backlog[c->unit] == NULL)
918                         i2ob_backlog_tail[c->unit] = NULL;
919                         
920                 unit = MINOR(ireq->req->rq_dev);
921                 i2ob_send(m, dev, ireq, i2ob[unit].start_sect, unit);
922         }
923         if(i2ob_backlog[c->unit])
924                 return 1;
925         return 0;
926 }
927
928 /*
929  *      The I2O block driver is listed as one of those that pulls the
930  *      front entry off the queue before processing it. This is important
931  *      to remember here. If we drop the io lock then CURRENT will change
932  *      on us. We must unlink CURRENT in this routine before we return, if
933  *      we use it.
934  */
935
936 static void i2ob_request(request_queue_t *q)
937 {
938         struct request *req;
939         struct i2ob_request *ireq;
940         int unit;
941         struct i2ob_device *dev;
942         u32 m;
943         
944         
945         while (!list_empty(&q->queue_head)) {
946                 /*
947                  *      On an IRQ completion if there is an inactive
948                  *      request on the queue head it means it isnt yet
949                  *      ready to dispatch.
950                  */
951                 req = blkdev_entry_next_request(&q->queue_head);
952
953                 if(req->rq_status == RQ_INACTIVE)
954                         return;
955                         
956                 unit = MINOR(req->rq_dev);
957                 dev = &i2ob_dev[(unit&0xF0)];
958
959                 /* 
960                  *      Queue depths probably belong with some kind of 
961                  *      generic IOP commit control. Certainly its not right 
962                  *      its global!  
963                  */
964                 if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) >= dev->depth)
965                         break;
966                 
967                 /*
968                  *      Is the channel constipated ?
969                  */
970
971                 if(i2ob_backlog[dev->unit]!=NULL)
972                         break;
973                         
974                 /* Get a message */
975                 m = i2ob_get(dev);
976
977                 if(m==0xFFFFFFFF)
978                 {
979                         /* 
980                          * See if the timer has already been queued.
981                          */
982                         if (!i2ob_timer_started)
983                         {
984                                 DEBUG((KERN_ERR "i2ob: starting timer\n"));
985
986                                 /*
987                                  * Set the timer_started flag to insure
988                                  * that the timer is only queued once.
989                                  * Queing it more than once will corrupt
990                                  * the timer queue.
991                                  */
992                                 i2ob_timer_started = 1;
993
994                                 /* 
995                                  * Set up the timer to expire in
996                                  * 500ms.
997                                  */
998                                 i2ob_timer.expires = jiffies + (HZ >> 1);
999                                 i2ob_timer.data = (unsigned int)q;
1000
1001                                 /*
1002                                  * Start it.
1003                                  */
1004                                  
1005                                 add_timer(&i2ob_timer);
1006                                 return;
1007                         }
1008                 }
1009
1010                 /*
1011                  * Everything ok, so pull from kernel queue onto our queue
1012                  */
1013                 req->errors = 0;
1014                 blkdev_dequeue_request(req);    
1015                 req->waiting = NULL;
1016                 
1017                 ireq = i2ob_queues[dev->unit]->i2ob_qhead;
1018                 i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
1019                 ireq->req = req;
1020
1021                 i2ob_send(m, dev, ireq, i2ob[unit].start_sect, (unit&0xF0));
1022         }
1023 }
1024
1025
1026 /*
1027  *      SCSI-CAM for ioctl geometry mapping
1028  *      Duplicated with SCSI - this should be moved into somewhere common
1029  *      perhaps genhd ?
1030  *
1031  * LBA -> CHS mapping table taken from:
1032  *
1033  * "Incorporating the I2O Architecture into BIOS for Intel Architecture 
1034  *  Platforms" 
1035  *
1036  * This is an I2O document that is only available to I2O members,
1037  * not developers.
1038  *
1039  * From my understanding, this is how all the I2O cards do this
1040  *
1041  * Disk Size      | Sectors | Heads | Cylinders
1042  * ---------------+---------+-------+-------------------
1043  * 1 < X <= 528M  | 63      | 16    | X/(63 * 16 * 512)
1044  * 528M < X <= 1G | 63      | 32    | X/(63 * 32 * 512)
1045  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
1046  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
1047  *
1048  */
1049 #define BLOCK_SIZE_528M         1081344
1050 #define BLOCK_SIZE_1G           2097152
1051 #define BLOCK_SIZE_21G          4403200
1052 #define BLOCK_SIZE_42G          8806400
1053 #define BLOCK_SIZE_84G          17612800
1054
1055 static void i2o_block_biosparam(
1056         unsigned long capacity,
1057         unsigned short *cyls,
1058         unsigned char *hds,
1059         unsigned char *secs) 
1060
1061         unsigned long heads, sectors, cylinders; 
1062
1063         sectors = 63L;                          /* Maximize sectors per track */ 
1064         if(capacity <= BLOCK_SIZE_528M)
1065                 heads = 16;
1066         else if(capacity <= BLOCK_SIZE_1G)
1067                 heads = 32;
1068         else if(capacity <= BLOCK_SIZE_21G)
1069                 heads = 64;
1070         else if(capacity <= BLOCK_SIZE_42G)
1071                 heads = 128;
1072         else
1073                 heads = 255;
1074
1075         cylinders = capacity / (heads * sectors);
1076
1077         *cyls = (unsigned short) cylinders;     /* Stuff return values */ 
1078         *secs = (unsigned char) sectors; 
1079         *hds  = (unsigned char) heads; 
1080 }
1081
1082
1083 /*
1084  *      Rescan the partition tables
1085  */
1086  
1087 static int do_i2ob_revalidate(kdev_t dev, int maxu)
1088 {
1089         int minor=MINOR(dev);
1090         int i;
1091         
1092         minor&=0xF0;
1093
1094         i2ob_dev[minor].refcnt++;
1095         if(i2ob_dev[minor].refcnt>maxu+1)
1096         {
1097                 i2ob_dev[minor].refcnt--;
1098                 return -EBUSY;
1099         }
1100         
1101         for( i = 15; i>=0 ; i--)
1102         {
1103                 int m = minor+i;
1104                 invalidate_device(MKDEV(MAJOR_NR, m), 1);
1105                 i2ob_gendisk.part[m].start_sect = 0;
1106                 i2ob_gendisk.part[m].nr_sects = 0;
1107         }
1108
1109         /*
1110          *      Do a physical check and then reconfigure
1111          */
1112          
1113         i2ob_install_device(i2ob_dev[minor].controller, i2ob_dev[minor].i2odev,
1114                 minor);
1115         i2ob_dev[minor].refcnt--;
1116         return 0;
1117 }
1118
1119 /*
1120  *      Issue device specific ioctl calls.
1121  */
1122
1123 static int i2ob_ioctl(struct inode *inode, struct file *file,
1124                      unsigned int cmd, unsigned long arg)
1125 {
1126         struct i2ob_device *dev;
1127         int minor;
1128
1129         /* Anyone capable of this syscall can do *real bad* things */
1130
1131         if (!capable(CAP_SYS_ADMIN))
1132                 return -EPERM;
1133         if (!inode)
1134                 return -EINVAL;
1135         minor = MINOR(inode->i_rdev);
1136         if (minor >= (MAX_I2OB<<4))
1137                 return -ENODEV;
1138
1139         dev = &i2ob_dev[minor];
1140         switch (cmd) {
1141                 case BLKGETSIZE:
1142                         return put_user(i2ob[minor].nr_sects, (long *) arg);
1143
1144                 case HDIO_GETGEO:
1145                 {
1146                         struct hd_geometry g;
1147                         int u=minor&0xF0;
1148                         i2o_block_biosparam(i2ob_sizes[u]<<1, 
1149                                 &g.cylinders, &g.heads, &g.sectors);
1150                         g.start = i2ob[minor].start_sect;
1151                         return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
1152                 }
1153         
1154                 case BLKRRPART:
1155                         if(!capable(CAP_SYS_ADMIN))
1156                                 return -EACCES;
1157                         return do_i2ob_revalidate(inode->i_rdev,1);
1158                         
1159                 case BLKFLSBUF:
1160                 case BLKROSET:
1161                 case BLKROGET:
1162                 case BLKRASET:
1163                 case BLKRAGET:
1164                 case BLKPG:
1165                         return blk_ioctl(inode->i_rdev, cmd, arg);
1166                         
1167                 default:
1168                         return -EINVAL;
1169         }
1170 }
1171
1172 /*
1173  *      Close the block device down
1174  */
1175  
1176 static int i2ob_release(struct inode *inode, struct file *file)
1177 {
1178         struct i2ob_device *dev;
1179         int minor;
1180
1181         minor = MINOR(inode->i_rdev);
1182         if (minor >= (MAX_I2OB<<4))
1183                 return -ENODEV;
1184         dev = &i2ob_dev[(minor&0xF0)];
1185
1186         /*
1187          * This is to deail with the case of an application
1188          * opening a device and then the device dissapears while
1189          * it's in use, and then the application tries to release
1190          * it.  ex: Unmounting a deleted RAID volume at reboot. 
1191          * If we send messages, it will just cause FAILs since
1192          * the TID no longer exists.
1193          */
1194         if(!dev->i2odev)
1195                 return 0;
1196
1197         /* Sync the device so we don't get errors */
1198         fsync_dev(inode->i_rdev);
1199
1200         if (dev->refcnt <= 0)
1201                 printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
1202         dev->refcnt--;
1203         if(dev->refcnt==0)
1204         {
1205                 /*
1206                  *      Flush the onboard cache on unmount
1207                  */
1208                 u32 msg[5];
1209                 int *query_done = &dev->done_flag;
1210                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1211                 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1212                 msg[2] = i2ob_context|0x40000000;
1213                 msg[3] = (u32)query_done;
1214                 msg[4] = 60<<16;
1215                 DEBUG("Flushing...");
1216                 i2o_post_wait(dev->controller, msg, 20, 60);
1217
1218                 /*
1219                  *      Unlock the media
1220                  */
1221                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1222                 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1223                 msg[2] = i2ob_context|0x40000000;
1224                 msg[3] = (u32)query_done;
1225                 msg[4] = -1;
1226                 DEBUG("Unlocking...");
1227                 i2o_post_wait(dev->controller, msg, 20, 2);
1228                 DEBUG("Unlocked.\n");
1229         
1230                 /*
1231                  * Now unclaim the device.
1232                  */
1233
1234                 if (i2o_release_device(dev->i2odev, &i2o_block_handler))
1235                         printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
1236                 
1237                 DEBUG("Unclaim\n");
1238         }
1239         MOD_DEC_USE_COUNT;
1240         return 0;
1241 }
1242
1243 /*
1244  *      Open the block device.
1245  */
1246  
1247 static int i2ob_open(struct inode *inode, struct file *file)
1248 {
1249         int minor;
1250         struct i2ob_device *dev;
1251         
1252         if (!inode)
1253                 return -EINVAL;
1254         minor = MINOR(inode->i_rdev);
1255         if (minor >= MAX_I2OB<<4)
1256                 return -ENODEV;
1257         dev=&i2ob_dev[(minor&0xF0)];
1258
1259         if(!dev->i2odev)        
1260                 return -ENODEV;
1261         
1262         if(dev->refcnt++==0)
1263         { 
1264                 u32 msg[6];
1265                 
1266                 DEBUG("Claim ");
1267                 if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
1268                 {
1269                         dev->refcnt--;
1270                         printk(KERN_INFO "I2O Block: Could not open device\n");
1271                         return -EBUSY;
1272                 }
1273                 DEBUG("Claimed ");
1274                 
1275                 /*
1276                  *      Mount the media if needed. Note that we don't use
1277                  *      the lock bit. Since we have to issue a lock if it
1278                  *      refuses a mount (quite possible) then we might as
1279                  *      well just send two messages out.
1280                  */
1281                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;               
1282                 msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
1283                 msg[4] = -1;
1284                 msg[5] = 0;
1285                 DEBUG("Mount ");
1286                 i2o_post_wait(dev->controller, msg, 24, 2);
1287
1288                 /*
1289                  *      Lock the media
1290                  */
1291                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1292                 msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
1293                 msg[4] = -1;
1294                 DEBUG("Lock ");
1295                 i2o_post_wait(dev->controller, msg, 20, 2);
1296                 DEBUG("Ready.\n");
1297         }               
1298         MOD_INC_USE_COUNT;
1299         return 0;
1300 }
1301
1302 /*
1303  *      Issue a device query
1304  */
1305  
1306 static int i2ob_query_device(struct i2ob_device *dev, int table, 
1307         int field, void *buf, int buflen)
1308 {
1309         return i2o_query_scalar(dev->controller, dev->tid,
1310                 table, field, buf, buflen);
1311 }
1312
1313
1314 /*
1315  *      Install the I2O block device we found.
1316  */
1317  
1318 static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
1319 {
1320         u64 size;
1321         u32 blocksize;
1322         u32 limit;
1323         u8 type;
1324         u32 flags, status;
1325         struct i2ob_device *dev=&i2ob_dev[unit];
1326         int i;
1327
1328         /*
1329          * For logging purposes...
1330          */
1331         printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n", 
1332                         d->lct_data.tid, unit); 
1333
1334         /*
1335          *      Ask for the current media data. If that isn't supported
1336          *      then we ask for the device capacity data
1337          */
1338         if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
1339           || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
1340         {
1341                 i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
1342                 i2ob_query_device(dev, 0x0000, 4, &size, 8);
1343         }
1344         
1345         i2ob_query_device(dev, 0x0000, 5, &flags, 4);
1346         i2ob_query_device(dev, 0x0000, 6, &status, 4);
1347         i2ob_sizes[unit] = (int)(size>>10);
1348         for(i=unit; i <= unit+15 ; i++)
1349                 i2ob_hardsizes[i] = blocksize;
1350         i2ob_gendisk.part[unit].nr_sects = size>>9;
1351         i2ob[unit].nr_sects = (int)(size>>9);
1352
1353         /* Set limit based on inbound frame size */
1354         limit = (d->controller->status_block->inbound_frame_size - 8)/2;
1355         limit = limit<<9;
1356
1357         /*
1358          * Max number of Scatter-Gather Elements
1359          */     
1360
1361         for(i=unit;i<=unit+15;i++)
1362         {
1363                 if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy)
1364                 {
1365                         i2ob_max_sectors[i] = 32;
1366                         i2ob_dev[i].max_segments = 8;
1367                         i2ob_dev[i].depth = 4;
1368                 }
1369                 else if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req)
1370                 {
1371                         i2ob_max_sectors[i] = 8;
1372                         i2ob_dev[i].max_segments = 8;
1373                 }
1374                 else
1375                 {
1376                         /* MAX_SECTORS was used but 255 is a dumb number for
1377                            striped RAID */
1378                         i2ob_max_sectors[i]=256;
1379                         i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2;
1380                 }
1381         }
1382
1383         printk(KERN_INFO "Max segments set to %d\n", 
1384                                 i2ob_dev[unit].max_segments);
1385         printk(KERN_INFO "Byte limit is %d.\n", limit);
1386
1387         i2ob_query_device(dev, 0x0000, 0, &type, 1);
1388         
1389         sprintf(d->dev_name, "%s%c", i2ob_gendisk.major_name, 'a' + (unit>>4));
1390
1391         printk(KERN_INFO "%s: ", d->dev_name);
1392         switch(type)
1393         {
1394                 case 0: printk("Disk Storage");break;
1395                 case 4: printk("WORM");break;
1396                 case 5: printk("CD-ROM");break;
1397                 case 7: printk("Optical device");break;
1398                 default:
1399                         printk("Type %d", type);
1400         }
1401         if(status&(1<<10))
1402                 printk("(RAID)");
1403         if(((flags & (1<<3)) && !(status & (1<<3))) ||
1404            ((flags & (1<<4)) && !(status & (1<<4))))
1405         {
1406                 printk(KERN_INFO " Not loaded.\n");
1407                 return 1;
1408         }
1409         printk("- %dMb, %d byte sectors",
1410                 (int)(size>>20), blocksize);
1411         if(status&(1<<0))
1412         {
1413                 u32 cachesize;
1414                 i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
1415                 cachesize>>=10;
1416                 if(cachesize>4095)
1417                         printk(", %dMb cache", cachesize>>10);
1418                 else
1419                         printk(", %dKb cache", cachesize);
1420                 
1421         }
1422         printk(".\n");
1423         printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", 
1424                 d->dev_name, i2ob_max_sectors[unit]);
1425
1426         /* 
1427          * If this is the first I2O block device found on this IOP,
1428          * we need to initialize all the queue data structures
1429          * before any I/O can be performed. If it fails, this
1430          * device is useless.
1431          */
1432         if(!i2ob_queues[c->unit]) {
1433                 if(i2ob_init_iop(c->unit))
1434                         return 1;
1435         }
1436
1437         /* 
1438          * This will save one level of lookup/indirection in critical 
1439          * code so that we can directly get the queue ptr from the
1440          * device instead of having to go the IOP data structure.
1441          */
1442         dev->req_queue = &i2ob_queues[c->unit]->req_queue;
1443
1444         grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9));
1445
1446         /*
1447          * Register for the events we're interested in and that the
1448          * device actually supports.
1449          */
1450         i2o_event_register(c, d->lct_data.tid, i2ob_context, unit, 
1451                 (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
1452
1453         return 0;
1454 }
1455
1456 /*
1457  * Initialize IOP specific queue structures.  This is called
1458  * once for each IOP that has a block device sitting behind it.
1459  */
1460 static int i2ob_init_iop(unsigned int unit)
1461 {
1462         int i;
1463
1464         i2ob_queues[unit] = (struct i2ob_iop_queue*)
1465                 kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
1466         if(!i2ob_queues[unit])
1467         {
1468                 printk(KERN_WARNING
1469                         "Could not allocate request queue for I2O block device!\n");
1470                 return -1;
1471         }
1472
1473         for(i = 0; i< MAX_I2OB_DEPTH; i++)
1474         {
1475                 i2ob_queues[unit]->request_queue[i].next = 
1476                         &i2ob_queues[unit]->request_queue[i+1];
1477                 i2ob_queues[unit]->request_queue[i].num = i;
1478         }
1479         
1480         /* Queue is MAX_I2OB + 1... */
1481         i2ob_queues[unit]->request_queue[i].next = NULL;
1482         i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
1483         atomic_set(&i2ob_queues[unit]->queue_depth, 0);
1484
1485         blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request);
1486         blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0);
1487         i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge;
1488         i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge;
1489         i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests;
1490         i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit];
1491
1492         return 0;
1493 }
1494
1495 /*
1496  * Get the request queue for the given device.
1497  */     
1498 static request_queue_t* i2ob_get_queue(kdev_t dev)
1499 {
1500         int unit = MINOR(dev)&0xF0;
1501
1502         return i2ob_dev[unit].req_queue;
1503 }
1504
1505 /*
1506  * Probe the I2O subsytem for block class devices
1507  */
1508 static void i2ob_scan(int bios)
1509 {
1510         int i;
1511         int warned = 0;
1512
1513         struct i2o_device *d, *b=NULL;
1514         struct i2o_controller *c;
1515         struct i2ob_device *dev;
1516                 
1517         for(i=0; i< MAX_I2O_CONTROLLERS; i++)
1518         {
1519                 c=i2o_find_controller(i);
1520         
1521                 if(c==NULL)
1522                         continue;
1523
1524         /*
1525          *    The device list connected to the I2O Controller is doubly linked
1526          * Here we traverse the end of the list , and start claiming devices
1527          * from that end. This assures that within an I2O controller atleast
1528          * the newly created volumes get claimed after the older ones, thus
1529          * mapping to same major/minor (and hence device file name) after 
1530          * every reboot.
1531          * The exception being: 
1532          * 1. If there was a TID reuse.
1533          * 2. There was more than one I2O controller. 
1534          */
1535
1536         if(!bios)
1537         {
1538                 for (d=c->devices;d!=NULL;d=d->next)
1539                 if(d->next == NULL)
1540                         b = d;
1541         }
1542         else
1543                 b = c->devices;
1544
1545         while(b != NULL)
1546         {
1547                 d=b;
1548                 if(bios)
1549                         b = b->next;
1550                 else
1551                         b = b->prev;
1552
1553                         if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
1554                                 continue;
1555
1556                         if(d->lct_data.user_tid != 0xFFF)
1557                                 continue;
1558
1559                         if(bios)
1560                         {
1561                                 if(d->lct_data.bios_info != 0x80)
1562                                         continue;
1563                                 printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
1564                         }
1565                         else
1566                         {
1567                                 if(d->lct_data.bios_info == 0x80)
1568                                         continue; /*Already claimed on pass 1 */
1569                         }
1570
1571                         if(i2o_claim_device(d, &i2o_block_handler))
1572                         {
1573                                 printk(KERN_WARNING "i2o_block: Controller %d, TID %d\n", c->unit,
1574                                         d->lct_data.tid);
1575                                 printk(KERN_WARNING "\t%sevice refused claim! Skipping installation\n", bios?"Boot d":"D");
1576                                 continue;
1577                         }
1578
1579                         if(scan_unit<MAX_I2OB<<4)
1580                         {
1581                                 /*
1582                                  * Get the device and fill in the
1583                                  * Tid and controller.
1584                                  */
1585                                 dev=&i2ob_dev[scan_unit];
1586                                 dev->i2odev = d; 
1587                                 dev->controller = c;
1588                                 dev->unit = c->unit;
1589                                 dev->tid = d->lct_data.tid;
1590
1591                                 if(i2ob_install_device(c,d,scan_unit))
1592                                         printk(KERN_WARNING "Could not install I2O block device\n");
1593                                 else
1594                                 {
1595                                         scan_unit+=16;
1596                                         i2ob_dev_count++;
1597
1598                                         /* We want to know when device goes away */
1599                                         i2o_device_notify_on(d, &i2o_block_handler);
1600                                 }
1601                         }
1602                         else
1603                         {
1604                                 if(!warned++)
1605                                         printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit>>4);
1606                         }
1607                         i2o_release_device(d, &i2o_block_handler);
1608                 }
1609                 i2o_unlock_controller(c);
1610         }
1611 }
1612
1613 static void i2ob_probe(void)
1614 {
1615         /*
1616          *      Some overhead/redundancy involved here, while trying to
1617          *      claim the first boot volume encountered as /dev/i2o/hda
1618          *      everytime. All the i2o_controllers are searched and the
1619          *      first i2o block device marked as bootable is claimed
1620          *      If an I2O block device was booted off , the bios sets
1621          *      its bios_info field to 0x80, this what we search for.
1622          *      Assuming that the bootable volume is /dev/i2o/hda
1623          *      everytime will prevent any kernel panic while mounting
1624          *      root partition
1625          */
1626
1627         printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
1628         i2ob_scan(1);
1629
1630         /*
1631          *      Now the remainder.
1632          */
1633         printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
1634         i2ob_scan(0);
1635 }
1636
1637
1638 /*
1639  * New device notification handler.  Called whenever a new
1640  * I2O block storage device is added to the system.
1641  * 
1642  * Should we spin lock around this to keep multiple devs from 
1643  * getting updated at the same time? 
1644  * 
1645  */
1646 void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
1647 {
1648         struct i2ob_device *dev;
1649         int unit = 0;
1650
1651         printk(KERN_INFO "i2o_block: New device detected\n");
1652         printk(KERN_INFO "   Controller %d Tid %d\n",c->unit, d->lct_data.tid);
1653
1654         /* Check for available space */
1655         if(i2ob_dev_count>=MAX_I2OB<<4)
1656         {
1657                 printk(KERN_ERR "i2o_block: No more devices allowed!\n");
1658                 return;
1659         }
1660         for(unit = 0; unit < (MAX_I2OB<<4); unit += 16)
1661         {
1662                 if(!i2ob_dev[unit].i2odev)
1663                         break;
1664         }
1665
1666         if(i2o_claim_device(d, &i2o_block_handler))
1667         {
1668                 printk(KERN_INFO 
1669                         "i2o_block: Unable to claim device. Installation aborted\n");
1670                 return;
1671         }
1672
1673         dev = &i2ob_dev[unit];
1674         dev->i2odev = d; 
1675         dev->controller = c;
1676         dev->tid = d->lct_data.tid;
1677
1678         if(i2ob_install_device(c,d,unit))
1679                 printk(KERN_ERR "i2o_block: Could not install new device\n");
1680         else    
1681         {
1682                 i2ob_dev_count++;
1683                 i2o_device_notify_on(d, &i2o_block_handler);
1684         }
1685
1686         i2o_release_device(d, &i2o_block_handler);
1687  
1688         return;
1689 }
1690
1691 /*
1692  * Deleted device notification handler.  Called when a device we
1693  * are talking to has been deleted by the user or some other
1694  * mysterious fource outside the kernel.
1695  */
1696 void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
1697 {       
1698         int unit = 0;
1699         int i = 0;
1700         int flags;
1701
1702         spin_lock_irqsave(&io_request_lock, flags);
1703
1704         /*
1705          * Need to do this...we somtimes get two events from the IRTOS
1706          * in a row and that causes lots of problems.
1707          */
1708         i2o_device_notify_off(d, &i2o_block_handler);
1709
1710         printk(KERN_INFO "I2O Block Device Deleted\n");
1711
1712         for(unit = 0; unit < MAX_I2OB<<4; unit += 16)
1713         {
1714                 if(i2ob_dev[unit].i2odev == d)
1715                 {
1716                         printk(KERN_INFO "  /dev/%s: Controller %d Tid %d\n", 
1717                                 d->dev_name, c->unit, d->lct_data.tid);
1718                         break;
1719                 }
1720         }
1721         if(unit >= MAX_I2OB<<4)
1722         {
1723                 printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
1724                 spin_unlock_irqrestore(&io_request_lock, flags);
1725                 return;
1726         }
1727
1728         /* 
1729          * This will force errors when i2ob_get_queue() is called
1730          * by the kenrel.
1731          */
1732         i2ob_dev[unit].req_queue = NULL;
1733         for(i = unit; i <= unit+15; i++)
1734         {
1735                 i2ob_dev[i].i2odev = NULL;
1736                 i2ob_sizes[i] = 0;
1737                 i2ob_hardsizes[i] = 0;
1738                 i2ob_max_sectors[i] = 0;
1739                 i2ob[i].nr_sects = 0;
1740                 i2ob_gendisk.part[i].nr_sects = 0;
1741         }
1742         spin_unlock_irqrestore(&io_request_lock, flags);
1743
1744         /*
1745          * Sync the device...this will force all outstanding I/Os
1746          * to attempt to complete, thus causing error messages.
1747          * We have to do this as the user could immediatelly create
1748          * a new volume that gets assigned the same minor number.
1749          * If there are still outstanding writes to the device,
1750          * that could cause data corruption on the new volume!
1751          *
1752          * The truth is that deleting a volume that you are currently
1753          * accessing will do _bad things_ to your system.  This 
1754          * handler will keep it from crashing, but must probably 
1755          * you'll have to do a 'reboot' to get the system running
1756          * properly.  Deleting disks you are using is dumb.  
1757          * Umount them first and all will be good!
1758          *
1759          * It's not this driver's job to protect the system from
1760          * dumb user mistakes :)
1761          */
1762         if(i2ob_dev[unit].refcnt)
1763                 fsync_dev(MKDEV(MAJOR_NR,unit));
1764
1765         /*
1766          * Decrease usage count for module
1767          */     
1768         while(i2ob_dev[unit].refcnt--)
1769                 MOD_DEC_USE_COUNT;
1770
1771         i2ob_dev[unit].refcnt = 0;
1772         
1773         i2ob_dev[i].tid = 0;
1774
1775         /* 
1776          * Do we need this?
1777          * The media didn't really change...the device is just gone
1778          */
1779         i2ob_media_change_flag[unit] = 1;
1780
1781         i2ob_dev_count--;       
1782 }
1783
1784 /*
1785  *      Have we seen a media change ?
1786  */
1787 static int i2ob_media_change(kdev_t dev)
1788 {
1789         int i=MINOR(dev);
1790         i>>=4;
1791         if(i2ob_media_change_flag[i])
1792         {
1793                 i2ob_media_change_flag[i]=0;
1794                 return 1;
1795         }
1796         return 0;
1797 }
1798
1799 static int i2ob_revalidate(kdev_t dev)
1800 {
1801         return do_i2ob_revalidate(dev, 0);
1802 }
1803
1804 /*
1805  * Reboot notifier.  This is called by i2o_core when the system
1806  * shuts down.
1807  */
1808 static void i2ob_reboot_event(void)
1809 {
1810         int i;
1811         
1812         for(i=0;i<MAX_I2OB;i++)
1813         {
1814                 struct i2ob_device *dev=&i2ob_dev[(i<<4)];
1815                 
1816                 if(dev->refcnt!=0)
1817                 {
1818                         /*
1819                          *      Flush the onboard cache
1820                          */
1821                         u32 msg[5];
1822                         int *query_done = &dev->done_flag;
1823                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1824                         msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1825                         msg[2] = i2ob_context|0x40000000;
1826                         msg[3] = (u32)query_done;
1827                         msg[4] = 60<<16;
1828                         
1829                         DEBUG("Flushing...");
1830                         i2o_post_wait(dev->controller, msg, 20, 60);
1831
1832                         DEBUG("Unlocking...");
1833                         /*
1834                          *      Unlock the media
1835                          */
1836                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1837                         msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1838                         msg[2] = i2ob_context|0x40000000;
1839                         msg[3] = (u32)query_done;
1840                         msg[4] = -1;
1841                         i2o_post_wait(dev->controller, msg, 20, 2);
1842                         
1843                         DEBUG("Unlocked.\n");
1844                 }
1845         }       
1846 }
1847
1848 static struct block_device_operations i2ob_fops =
1849 {
1850         open:                   i2ob_open,
1851         release:                i2ob_release,
1852         ioctl:                  i2ob_ioctl,
1853         check_media_change:     i2ob_media_change,
1854         revalidate:             i2ob_revalidate,
1855 };
1856
1857 static struct gendisk i2ob_gendisk = 
1858 {
1859         MAJOR_NR,
1860         "i2o/hd",
1861         4,
1862         1<<4,
1863         i2ob,
1864         i2ob_sizes,
1865         MAX_I2OB,
1866         NULL,
1867         NULL,
1868         &i2ob_fops,
1869 };
1870
1871
1872 /*
1873  * And here should be modules and kernel interface 
1874  *  (Just smiley confuses emacs :-)
1875  */
1876
1877 #ifdef MODULE
1878 #define i2o_block_init init_module
1879 #endif
1880
1881 int i2o_block_init(void)
1882 {
1883         int i;
1884
1885         printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
1886         printk(KERN_INFO "   (c) Copyright 1999-2001 Red Hat Software.\n");
1887         
1888         /*
1889          *      Register the block device interfaces
1890          */
1891
1892         if (register_blkdev(MAJOR_NR, "i2o_block", &i2ob_fops)) {
1893                 printk(KERN_ERR "Unable to get major number %d for i2o_block\n",
1894                        MAJOR_NR);
1895                 return -EIO;
1896         }
1897 #ifdef MODULE
1898         printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
1899 #endif
1900
1901         /*
1902          *      Now fill in the boiler plate
1903          */
1904          
1905         blksize_size[MAJOR_NR] = i2ob_blksizes;
1906         hardsect_size[MAJOR_NR] = i2ob_hardsizes;
1907         blk_size[MAJOR_NR] = i2ob_sizes;
1908         max_sectors[MAJOR_NR] = i2ob_max_sectors;
1909         blk_dev[MAJOR_NR].queue = i2ob_get_queue;
1910         
1911         blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request);
1912         blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
1913
1914         for (i = 0; i < MAX_I2OB << 4; i++) {
1915                 i2ob_dev[i].refcnt = 0;
1916                 i2ob_dev[i].flags = 0;
1917                 i2ob_dev[i].controller = NULL;
1918                 i2ob_dev[i].i2odev = NULL;
1919                 i2ob_dev[i].tid = 0;
1920                 i2ob_dev[i].head = NULL;
1921                 i2ob_dev[i].tail = NULL;
1922                 i2ob_dev[i].depth = MAX_I2OB_DEPTH;
1923                 i2ob_blksizes[i] = 1024;
1924                 i2ob_max_sectors[i] = 2;
1925         }
1926         
1927         /*
1928          *      Set up the queue
1929          */
1930         for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
1931         {
1932                 i2ob_queues[i] = NULL;
1933         }
1934
1935         /*
1936          *      Timers
1937          */
1938          
1939         init_timer(&i2ob_timer);
1940         i2ob_timer.function = i2ob_timer_handler;
1941         i2ob_timer.data = 0;
1942         
1943         /*
1944          *      Register the OSM handler as we will need this to probe for
1945          *      drives, geometry and other goodies.
1946          */
1947
1948         if(i2o_install_handler(&i2o_block_handler)<0)
1949         {
1950                 unregister_blkdev(MAJOR_NR, "i2o_block");
1951                 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1952                 printk(KERN_ERR "i2o_block: unable to register OSM.\n");
1953                 return -EINVAL;
1954         }
1955         i2ob_context = i2o_block_handler.context;        
1956
1957         /*
1958          * Initialize event handling thread
1959          */
1960         init_MUTEX_LOCKED(&i2ob_evt_sem);
1961         evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
1962         if(evt_pid < 0)
1963         {
1964                 printk(KERN_ERR 
1965                         "i2o_block: Could not initialize event thread.  Aborting\n");
1966                 i2o_remove_handler(&i2o_block_handler);
1967                 return 0;
1968         }
1969
1970         /*
1971          *      Finally see what is actually plugged in to our controllers
1972          */
1973         for (i = 0; i < MAX_I2OB; i++)
1974                 register_disk(&i2ob_gendisk, MKDEV(MAJOR_NR,i<<4), 1<<4,
1975                         &i2ob_fops, 0);
1976         i2ob_probe();
1977
1978         /*
1979          *      Adding i2ob_gendisk into the gendisk list.
1980          */     
1981         i2ob_gendisk.next = gendisk_head;
1982         gendisk_head = &i2ob_gendisk;
1983
1984         return 0;
1985 }
1986
1987 #ifdef MODULE
1988
1989 EXPORT_NO_SYMBOLS;
1990 MODULE_AUTHOR("Red Hat Software");
1991 MODULE_DESCRIPTION("I2O Block Device OSM");
1992
1993 void cleanup_module(void)
1994 {
1995         struct gendisk *gdp;
1996         int i;
1997         
1998         if(evt_running) {
1999                 printk(KERN_INFO "Killing I2O block threads...");
2000                 i = kill_proc(evt_pid, SIGTERM, 1);
2001                 if(!i) {
2002                         printk("waiting...");
2003                 }
2004                 /* Be sure it died */
2005                 down(&i2ob_thread_dead);
2006                 printk("done.\n");
2007         }
2008
2009         /*
2010          * Unregister for updates from any devices..otherwise we still
2011          * get them and the core jumps to random memory :O
2012          */
2013         if(i2ob_dev_count) {
2014                 struct i2o_device *d;
2015                 for(i = 0; i < MAX_I2OB; i++)
2016                 if((d=i2ob_dev[i<<4].i2odev)) {
2017                         i2o_device_notify_off(d, &i2o_block_handler);
2018                         i2o_event_register(d->controller, d->lct_data.tid, 
2019                                 i2ob_context, i<<4, 0);
2020                 }
2021         }
2022         
2023         /*
2024          *      We may get further callbacks for ourself. The i2o_core
2025          *      code handles this case reasonably sanely. The problem here
2026          *      is we shouldn't get them .. but a couple of cards feel 
2027          *      obliged to tell us stuff we dont care about.
2028          *
2029          *      This isnt ideal at all but will do for now.
2030          */
2031          
2032         set_current_state(TASK_UNINTERRUPTIBLE);
2033         schedule_timeout(HZ);
2034         
2035         /*
2036          *      Flush the OSM
2037          */
2038
2039         i2o_remove_handler(&i2o_block_handler);
2040                  
2041         /*
2042          *      Return the block device
2043          */
2044         if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
2045                 printk("i2o_block: cleanup_module failed\n");
2046
2047         /*
2048          * free request queue
2049          */
2050         blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
2051
2052         /*
2053          *      Why isnt register/unregister gendisk in the kernel ???
2054          */
2055
2056         if (gendisk_head == &i2ob_gendisk) {
2057                 gendisk_head = i2ob_gendisk.next;
2058                 }
2059         else {
2060                 for (gdp = gendisk_head; gdp; gdp = gdp->next)
2061                         if (gdp->next == &i2ob_gendisk)
2062                         {
2063                                 gdp->next = i2ob_gendisk.next;
2064                                 break;
2065                         }
2066         }
2067 }
2068 #endif