- Update Xen patches to c/s 1011.
[opensuse:kernel-source.git] / patches.xen / xen3-auto-xen-drivers.diff
1 Subject: xen3 xen-drivers
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1011:11175e60d393)
3 Patch-mainline: obsolete
4 Acked-by: jbeulich@novell.com
5
6 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
7 +++ sle11sp1-2010-03-29/drivers/xen/balloon/Makefile    2007-06-12 13:13:44.000000000 +0200
8 @@ -0,0 +1,2 @@
9 +
10 +obj-y := balloon.o sysfs.o
11 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
12 +++ sle11sp1-2010-03-29/drivers/xen/balloon/balloon.c   2010-03-31 00:00:00.000000000 +0200
13 @@ -0,0 +1,757 @@
14 +/******************************************************************************
15 + * balloon.c
16 + *
17 + * Xen balloon driver - enables returning/claiming memory to/from Xen.
18 + *
19 + * Copyright (c) 2003, B Dragovic
20 + * Copyright (c) 2003-2004, M Williamson, K Fraser
21 + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
22 + * 
23 + * This program is free software; you can redistribute it and/or
24 + * modify it under the terms of the GNU General Public License version 2
25 + * as published by the Free Software Foundation; or, when distributed
26 + * separately from the Linux kernel or incorporated into other
27 + * software packages, subject to the following license:
28 + * 
29 + * Permission is hereby granted, free of charge, to any person obtaining a copy
30 + * of this source file (the "Software"), to deal in the Software without
31 + * restriction, including without limitation the rights to use, copy, modify,
32 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
33 + * and to permit persons to whom the Software is furnished to do so, subject to
34 + * the following conditions:
35 + * 
36 + * The above copyright notice and this permission notice shall be included in
37 + * all copies or substantial portions of the Software.
38 + * 
39 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
40 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
41 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
42 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
43 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
44 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
45 + * IN THE SOFTWARE.
46 + */
47 +
48 +#include <linux/kernel.h>
49 +#include <linux/module.h>
50 +#include <linux/sched.h>
51 +#include <linux/errno.h>
52 +#include <linux/mm.h>
53 +#include <linux/mman.h>
54 +#include <linux/smp_lock.h>
55 +#include <linux/pagemap.h>
56 +#include <linux/bootmem.h>
57 +#include <linux/highmem.h>
58 +#include <linux/vmalloc.h>
59 +#include <linux/mutex.h>
60 +#include <xen/xen_proc.h>
61 +#include <asm/hypervisor.h>
62 +#include <xen/balloon.h>
63 +#include <xen/interface/memory.h>
64 +#include <asm/maddr.h>
65 +#include <asm/page.h>
66 +#include <asm/pgalloc.h>
67 +#include <asm/pgtable.h>
68 +#include <asm/uaccess.h>
69 +#include <asm/tlb.h>
70 +#include <linux/highmem.h>
71 +#include <linux/list.h>
72 +#include <xen/xenbus.h>
73 +#include "common.h"
74 +
75 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
76 +#include <xen/platform-compat.h>
77 +#endif
78 +
79 +#ifdef CONFIG_PROC_FS
80 +static struct proc_dir_entry *balloon_pde;
81 +#endif
82 +
83 +static DEFINE_MUTEX(balloon_mutex);
84 +
85 +/*
86 + * Protects atomic reservation decrease/increase against concurrent increases.
87 + * Also protects non-atomic updates of current_pages and driver_pages, and
88 + * balloon lists.
89 + */
90 +DEFINE_SPINLOCK(balloon_lock);
91 +
92 +struct balloon_stats balloon_stats;
93 +
94 +/* We increase/decrease in batches which fit in a page */
95 +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
96 +
97 +/* VM /proc information for memory */
98 +extern unsigned long totalram_pages;
99 +
100 +#ifndef MODULE
101 +extern unsigned long totalhigh_pages;
102 +#define inc_totalhigh_pages() (totalhigh_pages++)
103 +#define dec_totalhigh_pages() (totalhigh_pages--)
104 +#else
105 +#define inc_totalhigh_pages() ((void)0)
106 +#define dec_totalhigh_pages() ((void)0)
107 +#endif
108 +
109 +#ifndef CONFIG_XEN
110 +/*
111 + * In HVM guests accounting here uses the Xen visible values, but the kernel
112 + * determined totalram_pages value shouldn't get altered. Since totalram_pages
113 + * includes neither the kernel static image nor any memory allocated prior to
114 + * or from the bootmem allocator, we have to synchronize the two values.
115 + */
116 +static unsigned long __read_mostly totalram_bias;
117 +#else
118 +#define totalram_bias 0
119 +#endif
120 +
121 +/* List of ballooned pages, threaded through the mem_map array. */
122 +static LIST_HEAD(ballooned_pages);
123 +
124 +/* Main work function, always executed in process context. */
125 +static void balloon_process(void *unused);
126 +static DECLARE_WORK(balloon_worker, balloon_process, NULL);
127 +static struct timer_list balloon_timer;
128 +
129 +/* When ballooning out (allocating memory to return to Xen) we don't really 
130 +   want the kernel to try too hard since that can trigger the oom killer. */
131 +#define GFP_BALLOON \
132 +       (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD)
133 +
134 +#define PAGE_TO_LIST(p) (&(p)->lru)
135 +#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
136 +#define UNLIST_PAGE(p)                         \
137 +       do {                                    \
138 +               list_del(PAGE_TO_LIST(p));      \
139 +               PAGE_TO_LIST(p)->next = NULL;   \
140 +               PAGE_TO_LIST(p)->prev = NULL;   \
141 +       } while(0)
142 +
143 +#define IPRINTK(fmt, args...) \
144 +       printk(KERN_INFO "xen_mem: " fmt, ##args)
145 +#define WPRINTK(fmt, args...) \
146 +       printk(KERN_WARNING "xen_mem: " fmt, ##args)
147 +
148 +/* balloon_append: add the given page to the balloon. */
149 +static void balloon_append(struct page *page)
150 +{
151 +       /* Lowmem is re-populated first, so highmem pages go at list tail. */
152 +       if (PageHighMem(page)) {
153 +               list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
154 +               bs.balloon_high++;
155 +               dec_totalhigh_pages();
156 +       } else {
157 +               list_add(PAGE_TO_LIST(page), &ballooned_pages);
158 +               bs.balloon_low++;
159 +       }
160 +}
161 +
162 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
163 +static struct page *balloon_retrieve(void)
164 +{
165 +       struct page *page;
166 +
167 +       if (list_empty(&ballooned_pages))
168 +               return NULL;
169 +
170 +       page = LIST_TO_PAGE(ballooned_pages.next);
171 +       UNLIST_PAGE(page);
172 +
173 +       if (PageHighMem(page)) {
174 +               bs.balloon_high--;
175 +               inc_totalhigh_pages();
176 +       }
177 +       else
178 +               bs.balloon_low--;
179 +
180 +       return page;
181 +}
182 +
183 +static struct page *balloon_first_page(void)
184 +{
185 +       if (list_empty(&ballooned_pages))
186 +               return NULL;
187 +       return LIST_TO_PAGE(ballooned_pages.next);
188 +}
189 +
190 +static struct page *balloon_next_page(struct page *page)
191 +{
192 +       struct list_head *next = PAGE_TO_LIST(page)->next;
193 +       if (next == &ballooned_pages)
194 +               return NULL;
195 +       return LIST_TO_PAGE(next);
196 +}
197 +
198 +static inline void balloon_free_page(struct page *page)
199 +{
200 +#ifndef MODULE
201 +       if (put_page_testzero(page))
202 +               free_cold_page(page);
203 +#else
204 +       /* free_cold_page() is not being exported. */
205 +       __free_page(page);
206 +#endif
207 +}
208 +
209 +static void balloon_alarm(unsigned long unused)
210 +{
211 +       schedule_work(&balloon_worker);
212 +}
213 +
214 +static unsigned long current_target(void)
215 +{
216 +       unsigned long target = bs.target_pages;
217 +       if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
218 +               target = bs.current_pages + bs.balloon_low + bs.balloon_high;
219 +       return target;
220 +}
221 +
222 +static unsigned long minimum_target(void)
223 +{
224 +#ifndef CONFIG_XEN
225 +#define max_pfn num_physpages
226 +#endif
227 +       unsigned long min_pages, curr_pages = current_target();
228 +
229 +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
230 +       /* Simple continuous piecewiese linear function:
231 +        *  max MiB -> min MiB  gradient
232 +        *       0         0
233 +        *      16        16
234 +        *      32        24
235 +        *     128        72    (1/2)
236 +        *     512       168    (1/4)
237 +        *    2048       360    (1/8)
238 +        *    8192       552    (1/32)
239 +        *   32768      1320
240 +        *  131072      4392
241 +        */
242 +       if (max_pfn < MB2PAGES(128))
243 +               min_pages = MB2PAGES(8) + (max_pfn >> 1);
244 +       else if (max_pfn < MB2PAGES(512))
245 +               min_pages = MB2PAGES(40) + (max_pfn >> 2);
246 +       else if (max_pfn < MB2PAGES(2048))
247 +               min_pages = MB2PAGES(104) + (max_pfn >> 3);
248 +       else
249 +               min_pages = MB2PAGES(296) + (max_pfn >> 5);
250 +#undef MB2PAGES
251 +
252 +       /* Don't enforce growth */
253 +       return min(min_pages, curr_pages);
254 +#ifndef CONFIG_XEN
255 +#undef max_pfn
256 +#endif
257 +}
258 +
259 +static int increase_reservation(unsigned long nr_pages)
260 +{
261 +       unsigned long  pfn, i, flags;
262 +       struct page   *page;
263 +       long           rc;
264 +       struct xen_memory_reservation reservation = {
265 +               .address_bits = 0,
266 +               .extent_order = 0,
267 +               .domid        = DOMID_SELF
268 +       };
269 +
270 +       if (nr_pages > ARRAY_SIZE(frame_list))
271 +               nr_pages = ARRAY_SIZE(frame_list);
272 +
273 +       balloon_lock(flags);
274 +
275 +       page = balloon_first_page();
276 +       for (i = 0; i < nr_pages; i++) {
277 +               BUG_ON(page == NULL);
278 +               frame_list[i] = page_to_pfn(page);;
279 +               page = balloon_next_page(page);
280 +       }
281 +
282 +       set_xen_guest_handle(reservation.extent_start, frame_list);
283 +       reservation.nr_extents = nr_pages;
284 +       rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
285 +       if (rc < 0)
286 +               goto out;
287 +
288 +       for (i = 0; i < rc; i++) {
289 +               page = balloon_retrieve();
290 +               BUG_ON(page == NULL);
291 +
292 +               pfn = page_to_pfn(page);
293 +               BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
294 +                      phys_to_machine_mapping_valid(pfn));
295 +
296 +               set_phys_to_machine(pfn, frame_list[i]);
297 +
298 +#ifdef CONFIG_XEN
299 +               /* Link back into the page tables if not highmem. */
300 +               if (pfn < max_low_pfn) {
301 +                       int ret;
302 +                       ret = HYPERVISOR_update_va_mapping(
303 +                               (unsigned long)__va(pfn << PAGE_SHIFT),
304 +                               pfn_pte_ma(frame_list[i], PAGE_KERNEL),
305 +                               0);
306 +                       BUG_ON(ret);
307 +               }
308 +#endif
309 +
310 +               /* Relinquish the page back to the allocator. */
311 +               ClearPageReserved(page);
312 +               init_page_count(page);
313 +               balloon_free_page(page);
314 +       }
315 +
316 +       bs.current_pages += rc;
317 +       totalram_pages = bs.current_pages - totalram_bias;
318 +
319 + out:
320 +       balloon_unlock(flags);
321 +
322 +       return rc < 0 ? rc : rc != nr_pages;
323 +}
324 +
325 +static int decrease_reservation(unsigned long nr_pages)
326 +{
327 +       unsigned long  pfn, i, flags;
328 +       struct page   *page;
329 +       void          *v;
330 +       int            need_sleep = 0;
331 +       int ret;
332 +       struct xen_memory_reservation reservation = {
333 +               .address_bits = 0,
334 +               .extent_order = 0,
335 +               .domid        = DOMID_SELF
336 +       };
337 +
338 +       if (nr_pages > ARRAY_SIZE(frame_list))
339 +               nr_pages = ARRAY_SIZE(frame_list);
340 +
341 +       for (i = 0; i < nr_pages; i++) {
342 +               if ((page = alloc_page(GFP_BALLOON)) == NULL) {
343 +                       nr_pages = i;
344 +                       need_sleep = 1;
345 +                       break;
346 +               }
347 +
348 +               pfn = page_to_pfn(page);
349 +               frame_list[i] = pfn_to_mfn(pfn);
350 +
351 +               if (!PageHighMem(page)) {
352 +                       v = phys_to_virt(pfn << PAGE_SHIFT);
353 +                       scrub_pages(v, 1);
354 +#ifdef CONFIG_XEN
355 +                       ret = HYPERVISOR_update_va_mapping(
356 +                               (unsigned long)v, __pte_ma(0), 0);
357 +                       BUG_ON(ret);
358 +#endif
359 +               }
360 +#ifdef CONFIG_XEN_SCRUB_PAGES
361 +               else {
362 +                       v = kmap(page);
363 +                       scrub_pages(v, 1);
364 +                       kunmap(page);
365 +               }
366 +#endif
367 +       }
368 +
369 +#ifdef CONFIG_XEN
370 +       /* Ensure that ballooned highmem pages don't have kmaps. */
371 +       kmap_flush_unused();
372 +       flush_tlb_all();
373 +#endif
374 +
375 +       balloon_lock(flags);
376 +
377 +       /* No more mappings: invalidate P2M and add to balloon. */
378 +       for (i = 0; i < nr_pages; i++) {
379 +               pfn = mfn_to_pfn(frame_list[i]);
380 +               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
381 +               balloon_append(pfn_to_page(pfn));
382 +       }
383 +
384 +       set_xen_guest_handle(reservation.extent_start, frame_list);
385 +       reservation.nr_extents   = nr_pages;
386 +       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
387 +       BUG_ON(ret != nr_pages);
388 +
389 +       bs.current_pages -= nr_pages;
390 +       totalram_pages = bs.current_pages - totalram_bias;
391 +
392 +       balloon_unlock(flags);
393 +
394 +       return need_sleep;
395 +}
396 +
397 +/*
398 + * We avoid multiple worker processes conflicting via the balloon mutex.
399 + * We may of course race updates of the target counts (which are protected
400 + * by the balloon lock), or with changes to the Xen hard limit, but we will
401 + * recover from these in time.
402 + */
403 +static void balloon_process(void *unused)
404 +{
405 +       int need_sleep = 0;
406 +       long credit;
407 +
408 +       mutex_lock(&balloon_mutex);
409 +
410 +       do {
411 +               credit = current_target() - bs.current_pages;
412 +               if (credit > 0)
413 +                       need_sleep = (increase_reservation(credit) != 0);
414 +               if (credit < 0)
415 +                       need_sleep = (decrease_reservation(-credit) != 0);
416 +
417 +#ifndef CONFIG_PREEMPT
418 +               if (need_resched())
419 +                       schedule();
420 +#endif
421 +       } while ((credit != 0) && !need_sleep);
422 +
423 +       /* Schedule more work if there is some still to be done. */
424 +       if (current_target() != bs.current_pages)
425 +               mod_timer(&balloon_timer, jiffies + HZ);
426 +
427 +       mutex_unlock(&balloon_mutex);
428 +}
429 +
430 +/* Resets the Xen limit, sets new target, and kicks off processing. */
431 +void balloon_set_new_target(unsigned long target)
432 +{
433 +       /* No need for lock. Not read-modify-write updates. */
434 +       bs.target_pages = max(target, minimum_target());
435 +       schedule_work(&balloon_worker);
436 +}
437 +
438 +static struct xenbus_watch target_watch =
439 +{
440 +       .node = "memory/target"
441 +};
442 +
443 +/* React to a change in the target key */
444 +static void watch_target(struct xenbus_watch *watch,
445 +                        const char **vec, unsigned int len)
446 +{
447 +       unsigned long long new_target;
448 +       int err;
449 +
450 +       err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
451 +       if (err != 1) {
452 +               /* This is ok (for domain0 at least) - so just return */
453 +               return;
454 +       }
455 +
456 +       /* The given memory/target value is in KiB, so it needs converting to
457 +        * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
458 +        */
459 +       balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
460 +}
461 +
462 +static int balloon_init_watcher(struct notifier_block *notifier,
463 +                               unsigned long event,
464 +                               void *data)
465 +{
466 +       int err;
467 +
468 +       err = register_xenbus_watch(&target_watch);
469 +       if (err)
470 +               printk(KERN_ERR "Failed to set balloon watcher\n");
471 +
472 +       return NOTIFY_DONE;
473 +}
474 +
475 +#ifdef CONFIG_PROC_FS
476 +static int balloon_write(struct file *file, const char __user *buffer,
477 +                        unsigned long count, void *data)
478 +{
479 +       char memstring[64], *endchar;
480 +       unsigned long long target_bytes;
481 +
482 +       if (!capable(CAP_SYS_ADMIN))
483 +               return -EPERM;
484 +
485 +       if (count <= 1)
486 +               return -EBADMSG; /* runt */
487 +       if (count > sizeof(memstring))
488 +               return -EFBIG;   /* too long */
489 +
490 +       if (copy_from_user(memstring, buffer, count))
491 +               return -EFAULT;
492 +       memstring[sizeof(memstring)-1] = '\0';
493 +
494 +       target_bytes = memparse(memstring, &endchar);
495 +       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
496 +
497 +       return count;
498 +}
499 +
500 +static int balloon_read(char *page, char **start, off_t off,
501 +                       int count, int *eof, void *data)
502 +{
503 +       int len;
504 +
505 +       len = sprintf(
506 +               page,
507 +               "Current allocation: %8lu kB\n"
508 +               "Requested target:   %8lu kB\n"
509 +               "Low-mem balloon:    %8lu kB\n"
510 +               "High-mem balloon:   %8lu kB\n"
511 +               "Driver pages:       %8lu kB\n",
512 +               PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages), 
513 +               PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
514 +               PAGES2KB(bs.driver_pages));
515 +
516 +
517 +       *eof = 1;
518 +       return len;
519 +}
520 +#endif
521 +
522 +static struct notifier_block xenstore_notifier;
523 +
524 +static int __init balloon_init(void)
525 +{
526 +#if !defined(CONFIG_XEN)
527 +# ifndef XENMEM_get_pod_target
528 +#  define XENMEM_get_pod_target 17
529 +       typedef struct xen_pod_target {
530 +               uint64_t target_pages;
531 +               uint64_t tot_pages;
532 +               uint64_t pod_cache_pages;
533 +               uint64_t pod_entries;
534 +               domid_t domid;
535 +       } xen_pod_target_t;
536 +# endif
537 +       xen_pod_target_t pod_target = { .domid = DOMID_SELF };
538 +       int rc;
539 +#elif defined(CONFIG_X86)
540 +       unsigned long pfn;
541 +       struct page *page;
542 +#endif
543 +
544 +       if (!is_running_on_xen())
545 +               return -ENODEV;
546 +
547 +       IPRINTK("Initialising balloon driver.\n");
548 +
549 +#ifdef CONFIG_XEN
550 +       bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
551 +       totalram_pages   = bs.current_pages;
552 +#else 
553 +       rc = HYPERVISOR_memory_op(XENMEM_get_pod_target, &pod_target);
554 +       /*
555 +        * Xen prior to 3.4.0 masks the memory_op command to 4 bits, thus
556 +        * converting XENMEM_get_pod_target to XENMEM_decrease_reservation.
557 +        * Fortunately this results in a request with all input fields zero,
558 +        * but (due to the way bit 4 and upwards get interpreted) a starting
559 +        * extent of 1. When start_extent > nr_extents (>= in newer Xen), we
560 +        * simply get start_extent returned.
561 +        */
562 +       totalram_bias = HYPERVISOR_memory_op(rc != -ENOSYS && rc != 1
563 +               ? XENMEM_maximum_reservation : XENMEM_current_reservation,
564 +               &pod_target.domid);
565 +       if ((long)totalram_bias != -ENOSYS) {
566 +               BUG_ON(totalram_bias < totalram_pages);
567 +               bs.current_pages = totalram_bias;
568 +               totalram_bias -= totalram_pages;
569 +       } else {
570 +               totalram_bias = 0;
571 +               bs.current_pages = totalram_pages;
572 +       }
573 +#endif
574 +       bs.target_pages  = bs.current_pages;
575 +       bs.balloon_low   = 0;
576 +       bs.balloon_high  = 0;
577 +       bs.driver_pages  = 0UL;
578 +
579 +       init_timer(&balloon_timer);
580 +       balloon_timer.data = 0;
581 +       balloon_timer.function = balloon_alarm;
582 +    
583 +#ifdef CONFIG_PROC_FS
584 +       if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
585 +               WPRINTK("Unable to create /proc/xen/balloon.\n");
586 +               return -1;
587 +       }
588 +
589 +       balloon_pde->read_proc  = balloon_read;
590 +       balloon_pde->write_proc = balloon_write;
591 +#endif
592 +       balloon_sysfs_init();
593 +
594 +#if defined(CONFIG_X86) && defined(CONFIG_XEN) 
595 +       /* Initialise the balloon with excess memory space. */
596 +       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
597 +               page = pfn_to_page(pfn);
598 +               if (!PageReserved(page))
599 +                       balloon_append(page);
600 +       }
601 +#endif
602 +
603 +       target_watch.callback = watch_target;
604 +       xenstore_notifier.notifier_call = balloon_init_watcher;
605 +
606 +       register_xenstore_notifier(&xenstore_notifier);
607 +    
608 +       return 0;
609 +}
610 +
611 +subsys_initcall(balloon_init);
612 +
613 +static void __exit balloon_exit(void)
614 +{
615 +       balloon_sysfs_exit();
616 +       /* XXX - release balloon here */
617 +}
618 +
619 +module_exit(balloon_exit); 
620 +
621 +void balloon_update_driver_allowance(long delta)
622 +{
623 +       unsigned long flags;
624 +
625 +       balloon_lock(flags);
626 +       bs.driver_pages += delta;
627 +       balloon_unlock(flags);
628 +}
629 +
630 +#ifdef CONFIG_XEN
631 +static int dealloc_pte_fn(
632 +       pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
633 +{
634 +       unsigned long mfn = pte_mfn(*pte);
635 +       int ret;
636 +       struct xen_memory_reservation reservation = {
637 +               .nr_extents   = 1,
638 +               .extent_order = 0,
639 +               .domid        = DOMID_SELF
640 +       };
641 +       set_xen_guest_handle(reservation.extent_start, &mfn);
642 +       set_pte_at(&init_mm, addr, pte, __pte_ma(0));
643 +       set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
644 +       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
645 +       BUG_ON(ret != 1);
646 +       return 0;
647 +}
648 +#endif
649 +
650 +struct page **alloc_empty_pages_and_pagevec(int nr_pages)
651 +{
652 +       unsigned long flags;
653 +       void *v;
654 +       struct page *page, **pagevec;
655 +       int i, ret;
656 +
657 +       pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
658 +       if (pagevec == NULL)
659 +               return NULL;
660 +
661 +       for (i = 0; i < nr_pages; i++) {
662 +               balloon_lock(flags);
663 +               page = balloon_first_page();
664 +               if (page && !PageHighMem(page)) {
665 +                       UNLIST_PAGE(page);
666 +                       bs.balloon_low--;
667 +                       balloon_unlock(flags);
668 +                       pagevec[i] = page;
669 +                       continue;
670 +               }
671 +               balloon_unlock(flags);
672 +
673 +               page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
674 +               if (page == NULL)
675 +                       goto err;
676 +
677 +               v = page_address(page);
678 +               scrub_pages(v, 1);
679 +
680 +               balloon_lock(flags);
681 +
682 +               if (xen_feature(XENFEAT_auto_translated_physmap)) {
683 +                       unsigned long gmfn = page_to_pfn(page);
684 +                       struct xen_memory_reservation reservation = {
685 +                               .nr_extents   = 1,
686 +                               .extent_order = 0,
687 +                               .domid        = DOMID_SELF
688 +                       };
689 +                       set_xen_guest_handle(reservation.extent_start, &gmfn);
690 +                       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
691 +                                                  &reservation);
692 +                       if (ret == 1)
693 +                               ret = 0; /* success */
694 +               } else {
695 +#ifdef CONFIG_XEN
696 +                       ret = apply_to_page_range(&init_mm, (unsigned long)v,
697 +                                                 PAGE_SIZE, dealloc_pte_fn,
698 +                                                 NULL);
699 +#else
700 +                       /* Cannot handle non-auto translate mode. */
701 +                       ret = 1;
702 +#endif
703 +               }
704 +
705 +               if (ret != 0) {
706 +                       balloon_unlock(flags);
707 +                       balloon_free_page(page);
708 +                       goto err;
709 +               }
710 +
711 +               totalram_pages = --bs.current_pages - totalram_bias;
712 +
713 +               balloon_unlock(flags);
714 +       }
715 +
716 + out:
717 +       schedule_work(&balloon_worker);
718 +#ifdef CONFIG_XEN
719 +       flush_tlb_all();
720 +#endif
721 +       return pagevec;
722 +
723 + err:
724 +       balloon_lock(flags);
725 +       while (--i >= 0)
726 +               balloon_append(pagevec[i]);
727 +       balloon_unlock(flags);
728 +       kfree(pagevec);
729 +       pagevec = NULL;
730 +       goto out;
731 +}
732 +
733 +void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
734 +{
735 +       unsigned long flags;
736 +       int i;
737 +
738 +       if (pagevec == NULL)
739 +               return;
740 +
741 +       balloon_lock(flags);
742 +       for (i = 0; i < nr_pages; i++) {
743 +               BUG_ON(page_count(pagevec[i]) != 1);
744 +               balloon_append(pagevec[i]);
745 +       }
746 +       balloon_unlock(flags);
747 +
748 +       kfree(pagevec);
749 +
750 +       schedule_work(&balloon_worker);
751 +}
752 +
753 +void balloon_release_driver_page(struct page *page)
754 +{
755 +       unsigned long flags;
756 +
757 +       balloon_lock(flags);
758 +       balloon_append(page);
759 +       bs.driver_pages--;
760 +       balloon_unlock(flags);
761 +
762 +       schedule_work(&balloon_worker);
763 +}
764 +
765 +EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
766 +EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
767 +EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
768 +EXPORT_SYMBOL_GPL(balloon_release_driver_page);
769 +
770 +MODULE_LICENSE("Dual BSD/GPL");
771 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
772 +++ sle11sp1-2010-03-29/drivers/xen/balloon/common.h    2009-06-09 15:01:37.000000000 +0200
773 @@ -0,0 +1,56 @@
774 +/******************************************************************************
775 + * balloon/common.h
776 + *
777 + * This program is free software; you can redistribute it and/or
778 + * modify it under the terms of the GNU General Public License version 2
779 + * as published by the Free Software Foundation; or, when distributed
780 + * separately from the Linux kernel or incorporated into other
781 + * software packages, subject to the following license:
782 + * 
783 + * Permission is hereby granted, free of charge, to any person obtaining a copy
784 + * of this source file (the "Software"), to deal in the Software without
785 + * restriction, including without limitation the rights to use, copy, modify,
786 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
787 + * and to permit persons to whom the Software is furnished to do so, subject to
788 + * the following conditions:
789 + * 
790 + * The above copyright notice and this permission notice shall be included in
791 + * all copies or substantial portions of the Software.
792 + * 
793 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
794 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
795 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
796 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
797 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
798 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
799 + * IN THE SOFTWARE.
800 + */
801 +
802 +#ifndef __XEN_BALLOON_COMMON_H__
803 +#define __XEN_BALLOON_COMMON_H__
804 +
805 +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
806 +
807 +struct balloon_stats {
808 +       /* We aim for 'current allocation' == 'target allocation'. */
809 +       unsigned long current_pages;
810 +       unsigned long target_pages;
811 +       /*
812 +        * Drivers may alter the memory reservation independently, but they
813 +        * must inform the balloon driver so we avoid hitting the hard limit.
814 +        */
815 +       unsigned long driver_pages;
816 +       /* Number of pages in high- and low-memory balloons. */
817 +       unsigned long balloon_low;
818 +       unsigned long balloon_high;
819 +};
820 +
821 +extern struct balloon_stats balloon_stats;
822 +#define bs balloon_stats
823 +
824 +int balloon_sysfs_init(void);
825 +void balloon_sysfs_exit(void);
826 +
827 +void balloon_set_new_target(unsigned long target);
828 +
829 +#endif /* __XEN_BALLOON_COMMON_H__ */
830 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
831 +++ sle11sp1-2010-03-29/drivers/xen/balloon/sysfs.c     2009-06-09 15:01:37.000000000 +0200
832 @@ -0,0 +1,167 @@
833 +/******************************************************************************
834 + * balloon/sysfs.c
835 + *
836 + * Xen balloon driver - sysfs interfaces.
837 + * 
838 + * This program is free software; you can redistribute it and/or
839 + * modify it under the terms of the GNU General Public License version 2
840 + * as published by the Free Software Foundation; or, when distributed
841 + * separately from the Linux kernel or incorporated into other
842 + * software packages, subject to the following license:
843 + * 
844 + * Permission is hereby granted, free of charge, to any person obtaining a copy
845 + * of this source file (the "Software"), to deal in the Software without
846 + * restriction, including without limitation the rights to use, copy, modify,
847 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
848 + * and to permit persons to whom the Software is furnished to do so, subject to
849 + * the following conditions:
850 + * 
851 + * The above copyright notice and this permission notice shall be included in
852 + * all copies or substantial portions of the Software.
853 + * 
854 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
855 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
856 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
857 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
858 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
859 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
860 + * IN THE SOFTWARE.
861 + */
862 +
863 +#include <linux/capability.h>
864 +#include <linux/errno.h>
865 +#include <linux/init.h>
866 +#include <linux/stat.h>
867 +#include <linux/string.h>
868 +#include <linux/sysdev.h>
869 +#include "common.h"
870 +
871 +#ifdef HAVE_XEN_PLATFORM_COMPAT_H
872 +#include <xen/platform-compat.h>
873 +#endif
874 +
875 +#define BALLOON_CLASS_NAME "xen_memory"
876 +
877 +#define BALLOON_SHOW(name, format, args...)                    \
878 +       static ssize_t show_##name(struct sys_device *dev,      \
879 +                                  char *buf)                   \
880 +       {                                                       \
881 +               return sprintf(buf, format, ##args);            \
882 +       }                                                       \
883 +       static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
884 +
885 +BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
886 +BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
887 +BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
888 +BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
889 +
890 +static ssize_t show_target_kb(struct sys_device *dev, char *buf)
891 +{
892 +       return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
893 +}
894 +
895 +static ssize_t store_target_kb(struct sys_device *dev,
896 +                              const char *buf,
897 +                              size_t count)
898 +{
899 +       char memstring[64], *endchar;
900 +       unsigned long long target_bytes;
901 +
902 +       if (!capable(CAP_SYS_ADMIN))
903 +               return -EPERM;
904 +       
905 +       if (count <= 1)
906 +               return -EBADMSG; /* runt */
907 +       if (count > sizeof(memstring))
908 +               return -EFBIG;   /* too long */
909 +       strcpy(memstring, buf);
910 +       
911 +       target_bytes = memparse(memstring, &endchar);
912 +       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
913 +       
914 +       return count;
915 +}
916 +
917 +static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
918 +                  show_target_kb, store_target_kb);
919 +
920 +static struct sysdev_attribute *balloon_attrs[] = {
921 +       &attr_target_kb,
922 +};
923 +
924 +static struct attribute *balloon_info_attrs[] = {
925 +       &attr_current_kb.attr,
926 +       &attr_low_kb.attr,
927 +       &attr_high_kb.attr,
928 +       &attr_driver_kb.attr,
929 +       NULL
930 +};
931 +
932 +static struct attribute_group balloon_info_group = {
933 +       .name = "info",
934 +       .attrs = balloon_info_attrs,
935 +};
936 +
937 +static struct sysdev_class balloon_sysdev_class = {
938 +       set_kset_name(BALLOON_CLASS_NAME),
939 +};
940 +
941 +static struct sys_device balloon_sysdev;
942 +
943 +static int __init register_balloon(struct sys_device *sysdev)
944 +{
945 +       int i, error;
946 +
947 +       error = sysdev_class_register(&balloon_sysdev_class);
948 +       if (error)
949 +               return error;
950 +
951 +       sysdev->id = 0;
952 +       sysdev->cls = &balloon_sysdev_class;
953 +
954 +       error = sysdev_register(sysdev);
955 +       if (error) {
956 +               sysdev_class_unregister(&balloon_sysdev_class);
957 +               return error;
958 +       }
959 +
960 +       for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
961 +               error = sysdev_create_file(sysdev, balloon_attrs[i]);
962 +               if (error)
963 +                       goto fail;
964 +       }
965 +
966 +       error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
967 +       if (error)
968 +               goto fail;
969 +       
970 +       return 0;
971 +
972 + fail:
973 +       while (--i >= 0)
974 +               sysdev_remove_file(sysdev, balloon_attrs[i]);
975 +       sysdev_unregister(sysdev);
976 +       sysdev_class_unregister(&balloon_sysdev_class);
977 +       return error;
978 +}
979 +
980 +static __exit void unregister_balloon(struct sys_device *sysdev)
981 +{
982 +       int i;
983 +
984 +       sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
985 +       for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
986 +               sysdev_remove_file(sysdev, balloon_attrs[i]);
987 +       sysdev_unregister(sysdev);
988 +       sysdev_class_unregister(&balloon_sysdev_class);
989 +}
990 +
991 +int __init balloon_sysfs_init(void)
992 +{
993 +       return register_balloon(&balloon_sysdev);
994 +}
995 +
996 +void __exit balloon_sysfs_exit(void)
997 +{
998 +       unregister_balloon(&balloon_sysdev);
999 +}
1000 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1001 +++ sle11sp1-2010-03-29/drivers/xen/blkback/Makefile    2009-06-09 15:01:37.000000000 +0200
1002 @@ -0,0 +1,4 @@
1003 +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o
1004 +obj-$(CONFIG_XEN_BLKBACK_PAGEMAP) += blkback-pagemap.o
1005 +
1006 +blkbk-y        := blkback.o xenbus.o interface.o vbd.o
1007 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1008 +++ sle11sp1-2010-03-29/drivers/xen/blkback/blkback.c   2010-03-22 12:00:53.000000000 +0100
1009 @@ -0,0 +1,703 @@
1010 +/******************************************************************************
1011 + * arch/xen/drivers/blkif/backend/main.c
1012 + * 
1013 + * Back-end of the driver for virtual block devices. This portion of the
1014 + * driver exports a 'unified' block-device interface that can be accessed
1015 + * by any operating system that implements a compatible front end. A 
1016 + * reference front-end implementation can be found in:
1017 + *  arch/xen/drivers/blkif/frontend
1018 + * 
1019 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
1020 + * Copyright (c) 2005, Christopher Clark
1021 + * 
1022 + * This program is free software; you can redistribute it and/or
1023 + * modify it under the terms of the GNU General Public License version 2
1024 + * as published by the Free Software Foundation; or, when distributed
1025 + * separately from the Linux kernel or incorporated into other
1026 + * software packages, subject to the following license:
1027 + * 
1028 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1029 + * of this source file (the "Software"), to deal in the Software without
1030 + * restriction, including without limitation the rights to use, copy, modify,
1031 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1032 + * and to permit persons to whom the Software is furnished to do so, subject to
1033 + * the following conditions:
1034 + * 
1035 + * The above copyright notice and this permission notice shall be included in
1036 + * all copies or substantial portions of the Software.
1037 + * 
1038 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1039 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1040 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1041 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1042 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1043 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1044 + * IN THE SOFTWARE.
1045 + */
1046 +
1047 +#include <linux/spinlock.h>
1048 +#include <linux/kthread.h>
1049 +#include <linux/list.h>
1050 +#include <linux/delay.h>
1051 +#include <xen/balloon.h>
1052 +#include <asm/hypervisor.h>
1053 +#include "common.h"
1054 +
1055 +/*
1056 + * These are rather arbitrary. They are fairly large because adjacent requests
1057 + * pulled from a communication ring are quite likely to end up being part of
1058 + * the same scatter/gather request at the disc.
1059 + * 
1060 + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
1061 + * 
1062 + * This will increase the chances of being able to write whole tracks.
1063 + * 64 should be enough to keep us competitive with Linux.
1064 + */
1065 +static int blkif_reqs = 64;
1066 +module_param_named(reqs, blkif_reqs, int, 0);
1067 +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
1068 +
1069 +/* Run-time switchable: /sys/module/blkback/parameters/ */
1070 +static unsigned int log_stats = 0;
1071 +static unsigned int debug_lvl = 0;
1072 +module_param(log_stats, int, 0644);
1073 +module_param(debug_lvl, int, 0644);
1074 +
1075 +/*
1076 + * Each outstanding request that we've passed to the lower device layers has a 
1077 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 
1078 + * the pendcnt towards zero. When it hits zero, the specified domain has a 
1079 + * response queued for it, with the saved 'id' passed back.
1080 + */
1081 +typedef struct {
1082 +       blkif_t       *blkif;
1083 +       u64            id;
1084 +       int            nr_pages;
1085 +       atomic_t       pendcnt;
1086 +       unsigned short operation;
1087 +       int            status;
1088 +       struct list_head free_list;
1089 +} pending_req_t;
1090 +
1091 +static pending_req_t *pending_reqs;
1092 +static struct list_head pending_free;
1093 +static DEFINE_SPINLOCK(pending_free_lock);
1094 +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
1095 +
1096 +#define BLKBACK_INVALID_HANDLE (~0)
1097 +
1098 +static struct page **pending_pages;
1099 +static grant_handle_t *pending_grant_handles;
1100 +
1101 +static inline int vaddr_pagenr(pending_req_t *req, int seg)
1102 +{
1103 +       return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
1104 +}
1105 +
1106 +#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
1107 +
1108 +static inline unsigned long vaddr(pending_req_t *req, int seg)
1109 +{
1110 +       unsigned long pfn = page_to_pfn(pending_page(req, seg));
1111 +       return (unsigned long)pfn_to_kaddr(pfn);
1112 +}
1113 +
1114 +#define pending_handle(_req, _seg) \
1115 +       (pending_grant_handles[vaddr_pagenr(_req, _seg)])
1116 +
1117 +
1118 +static int do_block_io_op(blkif_t *blkif);
1119 +static int dispatch_rw_block_io(blkif_t *blkif,
1120 +                                blkif_request_t *req,
1121 +                                pending_req_t *pending_req);
1122 +static void make_response(blkif_t *blkif, u64 id,
1123 +                         unsigned short op, int st);
1124 +
1125 +/******************************************************************
1126 + * misc small helpers
1127 + */
1128 +static pending_req_t* alloc_req(void)
1129 +{
1130 +       pending_req_t *req = NULL;
1131 +       unsigned long flags;
1132 +
1133 +       spin_lock_irqsave(&pending_free_lock, flags);
1134 +       if (!list_empty(&pending_free)) {
1135 +               req = list_entry(pending_free.next, pending_req_t, free_list);
1136 +               list_del(&req->free_list);
1137 +       }
1138 +       spin_unlock_irqrestore(&pending_free_lock, flags);
1139 +       return req;
1140 +}
1141 +
1142 +static void free_req(pending_req_t *req)
1143 +{
1144 +       unsigned long flags;
1145 +       int was_empty;
1146 +
1147 +       spin_lock_irqsave(&pending_free_lock, flags);
1148 +       was_empty = list_empty(&pending_free);
1149 +       list_add(&req->free_list, &pending_free);
1150 +       spin_unlock_irqrestore(&pending_free_lock, flags);
1151 +       if (was_empty)
1152 +               wake_up(&pending_free_wq);
1153 +}
1154 +
1155 +static void unplug_queue(blkif_t *blkif)
1156 +{
1157 +       if (blkif->plug == NULL)
1158 +               return;
1159 +       if (blkif->plug->unplug_fn)
1160 +               blkif->plug->unplug_fn(blkif->plug);
1161 +       blk_put_queue(blkif->plug);
1162 +       blkif->plug = NULL;
1163 +}
1164 +
1165 +static void plug_queue(blkif_t *blkif, struct block_device *bdev)
1166 +{
1167 +       request_queue_t *q = bdev_get_queue(bdev);
1168 +
1169 +       if (q == blkif->plug)
1170 +               return;
1171 +       unplug_queue(blkif);
1172 +       blk_get_queue(q);
1173 +       blkif->plug = q;
1174 +}
1175 +
1176 +static void fast_flush_area(pending_req_t *req)
1177 +{
1178 +       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1179 +       unsigned int i, invcount = 0;
1180 +       grant_handle_t handle;
1181 +       int ret;
1182 +
1183 +       for (i = 0; i < req->nr_pages; i++) {
1184 +               handle = pending_handle(req, i);
1185 +               if (handle == BLKBACK_INVALID_HANDLE)
1186 +                       continue;
1187 +               blkback_pagemap_clear(pending_page(req, i));
1188 +               gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
1189 +                                   GNTMAP_host_map, handle);
1190 +               pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
1191 +               invcount++;
1192 +       }
1193 +
1194 +       ret = HYPERVISOR_grant_table_op(
1195 +               GNTTABOP_unmap_grant_ref, unmap, invcount);
1196 +       BUG_ON(ret);
1197 +}
1198 +
1199 +/******************************************************************
1200 + * SCHEDULER FUNCTIONS
1201 + */
1202 +
1203 +static void print_stats(blkif_t *blkif)
1204 +{
1205 +       printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d\n",
1206 +              current->comm, blkif->st_oo_req,
1207 +              blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
1208 +       blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
1209 +       blkif->st_rd_req = 0;
1210 +       blkif->st_wr_req = 0;
1211 +       blkif->st_oo_req = 0;
1212 +}
1213 +
1214 +int blkif_schedule(void *arg)
1215 +{
1216 +       blkif_t *blkif = arg;
1217 +       struct vbd *vbd = &blkif->vbd;
1218 +
1219 +       blkif_get(blkif);
1220 +
1221 +       if (debug_lvl)
1222 +               printk(KERN_DEBUG "%s: started\n", current->comm);
1223 +
1224 +       while (!kthread_should_stop()) {
1225 +               if (try_to_freeze())
1226 +                       continue;
1227 +               if (unlikely(vbd->size != vbd_size(vbd)))
1228 +                       vbd_resize(blkif);
1229 +
1230 +               wait_event_interruptible(
1231 +                       blkif->wq,
1232 +                       blkif->waiting_reqs || kthread_should_stop());
1233 +               wait_event_interruptible(
1234 +                       pending_free_wq,
1235 +                       !list_empty(&pending_free) || kthread_should_stop());
1236 +
1237 +               blkif->waiting_reqs = 0;
1238 +               smp_mb(); /* clear flag *before* checking for work */
1239 +
1240 +               if (do_block_io_op(blkif))
1241 +                       blkif->waiting_reqs = 1;
1242 +               unplug_queue(blkif);
1243 +
1244 +               if (log_stats && time_after(jiffies, blkif->st_print))
1245 +                       print_stats(blkif);
1246 +       }
1247 +
1248 +       if (log_stats)
1249 +               print_stats(blkif);
1250 +       if (debug_lvl)
1251 +               printk(KERN_DEBUG "%s: exiting\n", current->comm);
1252 +
1253 +       blkif->xenblkd = NULL;
1254 +       blkif_put(blkif);
1255 +
1256 +       return 0;
1257 +}
1258 +
1259 +/******************************************************************
1260 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
1261 + */
1262 +
1263 +static void __end_block_io_op(pending_req_t *pending_req, int error)
1264 +{
1265 +       /* An error fails the entire request. */
1266 +       if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
1267 +           (error == -EOPNOTSUPP)) {
1268 +               DPRINTK("blkback: write barrier op failed, not supported\n");
1269 +               blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
1270 +               pending_req->status = BLKIF_RSP_EOPNOTSUPP;
1271 +       } else if (error) {
1272 +               DPRINTK("Buffer not up-to-date at end of operation, "
1273 +                       "error=%d\n", error);
1274 +               pending_req->status = BLKIF_RSP_ERROR;
1275 +       }
1276 +
1277 +       if (atomic_dec_and_test(&pending_req->pendcnt)) {
1278 +               fast_flush_area(pending_req);
1279 +               make_response(pending_req->blkif, pending_req->id,
1280 +                             pending_req->operation, pending_req->status);
1281 +               blkif_put(pending_req->blkif);
1282 +               free_req(pending_req);
1283 +       }
1284 +}
1285 +
1286 +static int end_block_io_op(struct bio *bio, unsigned int done, int error)
1287 +{
1288 +       if (bio->bi_size != 0)
1289 +               return 1;
1290 +       __end_block_io_op(bio->bi_private, error);
1291 +       bio_put(bio);
1292 +       return error;
1293 +}
1294 +
1295 +
1296 +/******************************************************************************
1297 + * NOTIFICATION FROM GUEST OS.
1298 + */
1299 +
1300 +static void blkif_notify_work(blkif_t *blkif)
1301 +{
1302 +       blkif->waiting_reqs = 1;
1303 +       wake_up(&blkif->wq);
1304 +}
1305 +
1306 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1307 +{
1308 +       blkif_notify_work(dev_id);
1309 +       return IRQ_HANDLED;
1310 +}
1311 +
1312 +
1313 +
1314 +/******************************************************************
1315 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
1316 + */
1317 +
1318 +static int do_block_io_op(blkif_t *blkif)
1319 +{
1320 +       blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1321 +       blkif_request_t req;
1322 +       pending_req_t *pending_req;
1323 +       RING_IDX rc, rp;
1324 +       int more_to_do = 0, ret;
1325 +
1326 +       rc = blk_rings->common.req_cons;
1327 +       rp = blk_rings->common.sring->req_prod;
1328 +       rmb(); /* Ensure we see queued requests up to 'rp'. */
1329 +
1330 +       while ((rc != rp) || (blkif->is_suspended_req)) {
1331 +
1332 +               if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
1333 +                       break;
1334 +
1335 +               if (kthread_should_stop()) {
1336 +                       more_to_do = 1;
1337 +                       break;
1338 +               }
1339 +
1340 +               pending_req = alloc_req();
1341 +               if (NULL == pending_req) {
1342 +                       blkif->st_oo_req++;
1343 +                       more_to_do = 1;
1344 +                       break;
1345 +               }
1346 +
1347 +        /* Handle the suspended request first, if one exists */
1348 +        if(blkif->is_suspended_req)
1349 +        {
1350 +            memcpy(&req, &blkif->suspended_req, sizeof(req));
1351 +            blkif->is_suspended_req = 0;
1352 +            goto handle_request;
1353 +        }
1354 +
1355 +               switch (blkif->blk_protocol) {
1356 +               case BLKIF_PROTOCOL_NATIVE:
1357 +                       memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
1358 +                       break;
1359 +               case BLKIF_PROTOCOL_X86_32:
1360 +                       blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
1361 +                       break;
1362 +               case BLKIF_PROTOCOL_X86_64:
1363 +                       blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
1364 +                       break;
1365 +               default:
1366 +                       BUG();
1367 +               }
1368 +               blk_rings->common.req_cons = ++rc; /* before make_response() */
1369 +
1370 +               /* Apply all sanity checks to /private copy/ of request. */
1371 +               barrier();
1372 +
1373 +handle_request:
1374 +        ret = 0;
1375 +               switch (req.operation) {
1376 +               case BLKIF_OP_READ:
1377 +                       blkif->st_rd_req++;
1378 +                       ret = dispatch_rw_block_io(blkif, &req, pending_req); 
1379 +                       break;
1380 +               case BLKIF_OP_WRITE_BARRIER:
1381 +                       blkif->st_br_req++;
1382 +                       /* fall through */
1383 +               case BLKIF_OP_WRITE:
1384 +                       blkif->st_wr_req++;
1385 +                       ret = dispatch_rw_block_io(blkif, &req, pending_req);
1386 +                       break;
1387 +               default:
1388 +                       /* A good sign something is wrong: sleep for a while to
1389 +                        * avoid excessive CPU consumption by a bad guest. */
1390 +                       msleep(1);
1391 +                       DPRINTK("error: unknown block io operation [%d]\n",
1392 +                               req.operation);
1393 +                       make_response(blkif, req.id, req.operation,
1394 +                                     BLKIF_RSP_ERROR);
1395 +                       free_req(pending_req);
1396 +                       break;
1397 +               }
1398 +        BUG_ON(ret != 0 && ret != -EAGAIN);
1399 +        /* If we can't handle the request at the moment, save it, and break the
1400 +         * loop */ 
1401 +        if(ret == -EAGAIN)
1402 +        {
1403 +            memcpy(&blkif->suspended_req, &req, sizeof(req));
1404 +            blkif->is_suspended_req = 1;
1405 +            /* Return "no more work pending", restart will be handled 'out of
1406 +             * band' */
1407 +            return 0;
1408 +        }
1409 +
1410 +               /* Yield point for this unbounded loop. */
1411 +               cond_resched();
1412 +       }
1413 +
1414 +       return more_to_do;
1415 +}
1416 +
1417 +static int dispatch_rw_block_io(blkif_t *blkif,
1418 +                                blkif_request_t *req,
1419 +                                pending_req_t *pending_req)
1420 +{
1421 +       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
1422 +       struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1423 +       struct phys_req preq;
1424 +       struct { 
1425 +               unsigned long buf; unsigned int nsec;
1426 +       } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1427 +       unsigned int nseg;
1428 +       struct bio *bio = NULL;
1429 +       int ret, i;
1430 +       int operation;
1431 +
1432 +       switch (req->operation) {
1433 +       case BLKIF_OP_READ:
1434 +               operation = READ;
1435 +               break;
1436 +       case BLKIF_OP_WRITE:
1437 +               operation = WRITE;
1438 +               break;
1439 +       case BLKIF_OP_WRITE_BARRIER:
1440 +               operation = WRITE_BARRIER;
1441 +               break;
1442 +       default:
1443 +               operation = 0; /* make gcc happy */
1444 +               BUG();
1445 +       }
1446 +
1447 +       /* Check that number of segments is sane. */
1448 +       nseg = req->nr_segments;
1449 +       if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || 
1450 +           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
1451 +               DPRINTK("Bad number of segments in request (%d)\n", nseg);
1452 +               goto fail_response;
1453 +       }
1454 +
1455 +       preq.dev           = req->handle;
1456 +       preq.sector_number = req->sector_number;
1457 +       preq.nr_sects      = 0;
1458 +
1459 +       pending_req->blkif     = blkif;
1460 +       pending_req->id        = req->id;
1461 +       pending_req->operation = req->operation;
1462 +       pending_req->status    = BLKIF_RSP_OKAY;
1463 +       pending_req->nr_pages  = nseg;
1464 +
1465 +       for (i = 0; i < nseg; i++) {
1466 +               uint32_t flags;
1467 +
1468 +               seg[i].nsec = req->seg[i].last_sect -
1469 +                       req->seg[i].first_sect + 1;
1470 +
1471 +               if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
1472 +                   (req->seg[i].last_sect < req->seg[i].first_sect))
1473 +                       goto fail_response;
1474 +               preq.nr_sects += seg[i].nsec;
1475 +
1476 +               flags = GNTMAP_host_map;
1477 +               if (operation != READ)
1478 +                       flags |= GNTMAP_readonly;
1479 +               gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
1480 +                                 req->seg[i].gref, blkif->domid);
1481 +       }
1482 +
1483 +       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
1484 +       BUG_ON(ret);
1485 +
1486 +#define GENERAL_ERR   (1<<0)
1487 +#define EAGAIN_ERR    (1<<1)
1488 +       for (i = 0; i < nseg; i++) {
1489 +               if (unlikely(map[i].status != 0)) {
1490 +                       DPRINTK("invalid buffer -- could not remap it\n");
1491 +                       map[i].handle = BLKBACK_INVALID_HANDLE;
1492 +                       ret |= GENERAL_ERR;
1493 +            if(map[i].status == GNTST_eagain)
1494 +                           ret |= EAGAIN_ERR;
1495 +               } else {
1496 +                       blkback_pagemap_set(vaddr_pagenr(pending_req, i),
1497 +                                           pending_page(pending_req, i),
1498 +                                           blkif->domid, req->handle,
1499 +                                           req->seg[i].gref);
1500 +               }
1501 +
1502 +               pending_handle(pending_req, i) = map[i].handle;
1503 +
1504 +               if (ret)
1505 +                       continue;
1506 +
1507 +               set_phys_to_machine(
1508 +                       page_to_pfn(pending_page(pending_req, i)),
1509 +                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
1510 +               seg[i].buf  = map[i].dev_bus_addr | 
1511 +                       (req->seg[i].first_sect << 9);
1512 +       }
1513 +
1514 +    /* If any of grant maps failed with GNTST_eagain, suspend and retry later */
1515 +    if(ret & EAGAIN_ERR)
1516 +    {
1517 +        fast_flush_area(pending_req);
1518 +        free_req(pending_req);
1519 +        return -EAGAIN;
1520 +    }
1521 +
1522 +       if (ret)
1523 +               goto fail_flush;
1524 +
1525 +       if (vbd_translate(&preq, blkif, operation) != 0) {
1526 +               DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
1527 +                       operation == READ ? "read" : "write",
1528 +                       preq.sector_number,
1529 +                       preq.sector_number + preq.nr_sects, preq.dev);
1530 +               goto fail_flush;
1531 +       }
1532 +
1533 +       plug_queue(blkif, preq.bdev);
1534 +       atomic_set(&pending_req->pendcnt, 1);
1535 +       blkif_get(blkif);
1536 +
1537 +       for (i = 0; i < nseg; i++) {
1538 +               if (((int)preq.sector_number|(int)seg[i].nsec) &
1539 +                   ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
1540 +                       DPRINTK("Misaligned I/O request from domain %d",
1541 +                               blkif->domid);
1542 +                       goto fail_put_bio;
1543 +               }
1544 +
1545 +               while ((bio == NULL) ||
1546 +                      (bio_add_page(bio,
1547 +                                    pending_page(pending_req, i),
1548 +                                    seg[i].nsec << 9,
1549 +                                    seg[i].buf & ~PAGE_MASK) == 0)) {
1550 +                       if (bio) {
1551 +                               atomic_inc(&pending_req->pendcnt);
1552 +                               submit_bio(operation, bio);
1553 +                       }
1554 +
1555 +                       bio = bio_alloc(GFP_KERNEL, nseg-i);
1556 +                       if (unlikely(bio == NULL))
1557 +                               goto fail_put_bio;
1558 +
1559 +                       bio->bi_bdev    = preq.bdev;
1560 +                       bio->bi_private = pending_req;
1561 +                       bio->bi_end_io  = end_block_io_op;
1562 +                       bio->bi_sector  = preq.sector_number;
1563 +               }
1564 +
1565 +               preq.sector_number += seg[i].nsec;
1566 +       }
1567 +
1568 +       if (!bio) {
1569 +               BUG_ON(operation != WRITE_BARRIER);
1570 +               bio = bio_alloc(GFP_KERNEL, 0);
1571 +               if (unlikely(bio == NULL))
1572 +                       goto fail_put_bio;
1573 +
1574 +               bio->bi_bdev    = preq.bdev;
1575 +               bio->bi_private = pending_req;
1576 +               bio->bi_end_io  = end_block_io_op;
1577 +               bio->bi_sector  = -1;
1578 +       }
1579 +
1580 +       submit_bio(operation, bio);
1581 +
1582 +       if (operation == READ)
1583 +               blkif->st_rd_sect += preq.nr_sects;
1584 +       else if (operation == WRITE || operation == WRITE_BARRIER)
1585 +               blkif->st_wr_sect += preq.nr_sects;
1586 +
1587 +       return 0;
1588 +
1589 + fail_flush:
1590 +       fast_flush_area(pending_req);
1591 + fail_response:
1592 +       make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
1593 +       free_req(pending_req);
1594 +       msleep(1); /* back off a bit */
1595 +       return 0;
1596 +
1597 + fail_put_bio:
1598 +       __end_block_io_op(pending_req, -EINVAL);
1599 +       if (bio)
1600 +               bio_put(bio);
1601 +       unplug_queue(blkif);
1602 +       msleep(1); /* back off a bit */
1603 +       return 0;
1604 +}
1605 +
1606 +
1607 +
1608 +/******************************************************************
1609 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
1610 + */
1611 +
1612 +
1613 +static void make_response(blkif_t *blkif, u64 id,
1614 +                         unsigned short op, int st)
1615 +{
1616 +       blkif_response_t  resp;
1617 +       unsigned long     flags;
1618 +       blkif_back_rings_t *blk_rings = &blkif->blk_rings;
1619 +       int more_to_do = 0;
1620 +       int notify;
1621 +
1622 +       resp.id        = id;
1623 +       resp.operation = op;
1624 +       resp.status    = st;
1625 +
1626 +       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
1627 +       /* Place on the response ring for the relevant domain. */
1628 +       switch (blkif->blk_protocol) {
1629 +       case BLKIF_PROTOCOL_NATIVE:
1630 +               memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
1631 +                      &resp, sizeof(resp));
1632 +               break;
1633 +       case BLKIF_PROTOCOL_X86_32:
1634 +               memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
1635 +                      &resp, sizeof(resp));
1636 +               break;
1637 +       case BLKIF_PROTOCOL_X86_64:
1638 +               memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
1639 +                      &resp, sizeof(resp));
1640 +               break;
1641 +       default:
1642 +               BUG();
1643 +       }
1644 +       blk_rings->common.rsp_prod_pvt++;
1645 +       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
1646 +       if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
1647 +               /*
1648 +                * Tail check for pending requests. Allows frontend to avoid
1649 +                * notifications if requests are already in flight (lower
1650 +                * overheads and promotes batching).
1651 +                */
1652 +               RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
1653 +
1654 +       } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
1655 +               more_to_do = 1;
1656 +       }
1657 +
1658 +       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
1659 +
1660 +       if (more_to_do)
1661 +               blkif_notify_work(blkif);
1662 +       if (notify)
1663 +               notify_remote_via_irq(blkif->irq);
1664 +}
1665 +
1666 +static int __init blkif_init(void)
1667 +{
1668 +       int i, mmap_pages;
1669 +
1670 +       if (!is_running_on_xen())
1671 +               return -ENODEV;
1672 +
1673 +       mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
1674 +
1675 +       pending_reqs          = kmalloc(sizeof(pending_reqs[0]) *
1676 +                                       blkif_reqs, GFP_KERNEL);
1677 +       pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
1678 +                                       mmap_pages, GFP_KERNEL);
1679 +       pending_pages         = alloc_empty_pages_and_pagevec(mmap_pages);
1680 +
1681 +       if (blkback_pagemap_init(mmap_pages))
1682 +               goto out_of_memory;
1683 +
1684 +       if (!pending_reqs || !pending_grant_handles || !pending_pages)
1685 +               goto out_of_memory;
1686 +
1687 +       for (i = 0; i < mmap_pages; i++)
1688 +               pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
1689 +
1690 +       blkif_interface_init();
1691 +
1692 +       memset(pending_reqs, 0, sizeof(pending_reqs));
1693 +       INIT_LIST_HEAD(&pending_free);
1694 +
1695 +       for (i = 0; i < blkif_reqs; i++)
1696 +               list_add_tail(&pending_reqs[i].free_list, &pending_free);
1697 +
1698 +       blkif_xenbus_init();
1699 +
1700 +       return 0;
1701 +
1702 + out_of_memory:
1703 +       kfree(pending_reqs);
1704 +       kfree(pending_grant_handles);
1705 +       free_empty_pages_and_pagevec(pending_pages, mmap_pages);
1706 +       printk("%s: out of memory\n", __FUNCTION__);
1707 +       return -ENOMEM;
1708 +}
1709 +
1710 +module_init(blkif_init);
1711 +
1712 +MODULE_LICENSE("Dual BSD/GPL");
1713 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1714 +++ sle11sp1-2010-03-29/drivers/xen/blkback/blkback-pagemap.c   2009-06-09 15:01:37.000000000 +0200
1715 @@ -0,0 +1,96 @@
1716 +#include <linux/module.h>
1717 +#include "blkback-pagemap.h"
1718 +
1719 +static int blkback_pagemap_size;
1720 +static struct blkback_pagemap *blkback_pagemap;
1721 +
1722 +static inline int
1723 +blkback_pagemap_entry_clear(struct blkback_pagemap *map)
1724 +{
1725 +       static struct blkback_pagemap zero;
1726 +       return !memcmp(map, &zero, sizeof(zero));
1727 +}
1728 +
1729 +int
1730 +blkback_pagemap_init(int pages)
1731 +{
1732 +       blkback_pagemap = kzalloc(pages * sizeof(struct blkback_pagemap),
1733 +                                 GFP_KERNEL);
1734 +       if (!blkback_pagemap)
1735 +               return -ENOMEM;
1736 +
1737 +       blkback_pagemap_size = pages;
1738 +       return 0;
1739 +}
1740 +EXPORT_SYMBOL_GPL(blkback_pagemap_init);
1741 +
1742 +void
1743 +blkback_pagemap_set(int idx, struct page *page,
1744 +                   domid_t domid, busid_t busid, grant_ref_t gref)
1745 +{
1746 +       struct blkback_pagemap *entry;
1747 +
1748 +       BUG_ON(!blkback_pagemap);
1749 +       BUG_ON(idx >= blkback_pagemap_size);
1750 +
1751 +       SetPageBlkback(page);
1752 +       set_page_private(page, idx);
1753 +
1754 +       entry = blkback_pagemap + idx;
1755 +       if (!blkback_pagemap_entry_clear(entry)) {
1756 +               printk("overwriting pagemap %d: d %u b %u g %u\n",
1757 +                      idx, entry->domid, entry->busid, entry->gref);
1758 +               BUG();
1759 +       }
1760 +
1761 +       entry->domid = domid;
1762 +       entry->busid = busid;
1763 +       entry->gref  = gref;
1764 +}
1765 +EXPORT_SYMBOL_GPL(blkback_pagemap_set);
1766 +
1767 +void
1768 +blkback_pagemap_clear(struct page *page)
1769 +{
1770 +       int idx;
1771 +       struct blkback_pagemap *entry;
1772 +
1773 +       idx = (int)page_private(page);
1774 +
1775 +       BUG_ON(!blkback_pagemap);
1776 +       BUG_ON(!PageBlkback(page));
1777 +       BUG_ON(idx >= blkback_pagemap_size);
1778 +
1779 +       entry = blkback_pagemap + idx;
1780 +       if (blkback_pagemap_entry_clear(entry)) {
1781 +               printk("clearing empty pagemap %d\n", idx);
1782 +               BUG();
1783 +       }
1784 +
1785 +       memset(entry, 0, sizeof(*entry));
1786 +}
1787 +EXPORT_SYMBOL_GPL(blkback_pagemap_clear);
1788 +
1789 +struct blkback_pagemap
1790 +blkback_pagemap_read(struct page *page)
1791 +{
1792 +       int idx;
1793 +       struct blkback_pagemap *entry;
1794 +
1795 +       idx = (int)page_private(page);
1796 +
1797 +       BUG_ON(!blkback_pagemap);
1798 +       BUG_ON(!PageBlkback(page));
1799 +       BUG_ON(idx >= blkback_pagemap_size);
1800 +
1801 +       entry = blkback_pagemap + idx;
1802 +       if (blkback_pagemap_entry_clear(entry)) {
1803 +               printk("reading empty pagemap %d\n", idx);
1804 +               BUG();
1805 +       }
1806 +
1807 +       return *entry;
1808 +}
1809 +EXPORT_SYMBOL(blkback_pagemap_read);
1810 +
1811 +MODULE_LICENSE("Dual BSD/GPL");
1812 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1813 +++ sle11sp1-2010-03-29/drivers/xen/blkback/blkback-pagemap.h   2009-06-09 15:01:37.000000000 +0200
1814 @@ -0,0 +1,37 @@
1815 +#ifndef _BLKBACK_PAGEMAP_H_
1816 +#define _BLKBACK_PAGEMAP_H_
1817 +
1818 +#include <linux/mm.h>
1819 +#include <xen/interface/xen.h>
1820 +#include <xen/interface/grant_table.h>
1821 +
1822 +typedef unsigned int busid_t;
1823 +
1824 +struct blkback_pagemap {
1825 +       domid_t          domid;
1826 +       busid_t          busid;
1827 +       grant_ref_t      gref;
1828 +};
1829 +
1830 +#if defined(CONFIG_XEN_BLKBACK_PAGEMAP) || defined(CONFIG_XEN_BLKBACK_PAGEMAP_MODULE)
1831 +
1832 +int blkback_pagemap_init(int);
1833 +void blkback_pagemap_set(int, struct page *, domid_t, busid_t, grant_ref_t);
1834 +void blkback_pagemap_clear(struct page *);
1835 +struct blkback_pagemap blkback_pagemap_read(struct page *);
1836 +
1837 +#else /* CONFIG_XEN_BLKBACK_PAGEMAP */
1838 +
1839 +static inline int blkback_pagemap_init(int pages) { return 0; }
1840 +static inline void blkback_pagemap_set(int idx, struct page *page, domid_t dom,
1841 +                                      busid_t bus, grant_ref_t gnt) {}
1842 +static inline void blkback_pagemap_clear(struct page *page) {}
1843 +static inline struct blkback_pagemap blkback_pagemap_read(struct page *page)
1844 +{
1845 +       BUG();
1846 +       return (struct blkback_pagemap){-1, -1, -1};
1847 +}
1848 +
1849 +#endif /* CONFIG_XEN_BLKBACK_PAGEMAP */
1850 +
1851 +#endif
1852 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1853 +++ sle11sp1-2010-03-29/drivers/xen/blkback/common.h    2010-03-22 12:00:53.000000000 +0100
1854 @@ -0,0 +1,155 @@
1855 +/* 
1856 + * This program is free software; you can redistribute it and/or
1857 + * modify it under the terms of the GNU General Public License version 2
1858 + * as published by the Free Software Foundation; or, when distributed
1859 + * separately from the Linux kernel or incorporated into other
1860 + * software packages, subject to the following license:
1861 + * 
1862 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1863 + * of this source file (the "Software"), to deal in the Software without
1864 + * restriction, including without limitation the rights to use, copy, modify,
1865 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1866 + * and to permit persons to whom the Software is furnished to do so, subject to
1867 + * the following conditions:
1868 + * 
1869 + * The above copyright notice and this permission notice shall be included in
1870 + * all copies or substantial portions of the Software.
1871 + * 
1872 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1873 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1874 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1875 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1876 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1877 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1878 + * IN THE SOFTWARE.
1879 + */
1880 +
1881 +#ifndef __BLKIF__BACKEND__COMMON_H__
1882 +#define __BLKIF__BACKEND__COMMON_H__
1883 +
1884 +#include <linux/version.h>
1885 +#include <linux/module.h>
1886 +#include <linux/interrupt.h>
1887 +#include <linux/slab.h>
1888 +#include <linux/blkdev.h>
1889 +#include <linux/vmalloc.h>
1890 +#include <linux/wait.h>
1891 +#include <asm/io.h>
1892 +#include <asm/setup.h>
1893 +#include <asm/pgalloc.h>
1894 +#include <xen/evtchn.h>
1895 +#include <asm/hypervisor.h>
1896 +#include <xen/blkif.h>
1897 +#include <xen/gnttab.h>
1898 +#include <xen/driver_util.h>
1899 +#include <xen/xenbus.h>
1900 +#include "blkback-pagemap.h"
1901 +
1902 +
1903 +#define DPRINTK(_f, _a...)                     \
1904 +       pr_debug("(file=%s, line=%d) " _f,      \
1905 +                __FILE__ , __LINE__ , ## _a )
1906 +
1907 +struct vbd {
1908 +       blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
1909 +       unsigned char  readonly;    /* Non-zero -> read-only */
1910 +       unsigned char  type;        /* VDISK_xxx */
1911 +       u32            pdevice;     /* phys device that this vbd maps to */
1912 +       struct block_device *bdev;
1913 +       sector_t       size;        /* Cached size parameter */
1914 +};
1915 +
1916 +struct backend_info;
1917 +
1918 +typedef struct blkif_st {
1919 +       /* Unique identifier for this interface. */
1920 +       domid_t           domid;
1921 +       unsigned int      handle;
1922 +       /* Physical parameters of the comms window. */
1923 +       unsigned int      irq;
1924 +       /* Comms information. */
1925 +       enum blkif_protocol blk_protocol;
1926 +       blkif_back_rings_t blk_rings;
1927 +       struct vm_struct *blk_ring_area;
1928 +       /* The VBD attached to this interface. */
1929 +       struct vbd        vbd;
1930 +       /* Back pointer to the backend_info. */
1931 +       struct backend_info *be;
1932 +       /* Private fields. */
1933 +       spinlock_t       blk_ring_lock;
1934 +       atomic_t         refcnt;
1935 +
1936 +       wait_queue_head_t   wq;
1937 +       struct task_struct  *xenblkd;
1938 +       unsigned int        waiting_reqs;
1939 +       request_queue_t     *plug;
1940 +    int                 is_suspended_req;
1941 +    blkif_request_t     suspended_req;
1942 +
1943 +       /* statistics */
1944 +       unsigned long       st_print;
1945 +       int                 st_rd_req;
1946 +       int                 st_wr_req;
1947 +       int                 st_oo_req;
1948 +       int                 st_br_req;
1949 +       int                 st_rd_sect;
1950 +       int                 st_wr_sect;
1951 +
1952 +       wait_queue_head_t waiting_to_free;
1953 +
1954 +       grant_handle_t shmem_handle;
1955 +       grant_ref_t    shmem_ref;
1956 +} blkif_t;
1957 +
1958 +struct backend_info
1959 +{
1960 +       struct xenbus_device *dev;
1961 +       blkif_t *blkif;
1962 +       struct xenbus_watch backend_watch;
1963 +       unsigned major;
1964 +       unsigned minor;
1965 +       char *mode;
1966 +};
1967 +
1968 +blkif_t *blkif_alloc(domid_t domid);
1969 +void blkif_disconnect(blkif_t *blkif);
1970 +void blkif_free(blkif_t *blkif);
1971 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
1972 +void vbd_resize(blkif_t *blkif);
1973 +
1974 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
1975 +#define blkif_put(_b)                                  \
1976 +       do {                                            \
1977 +               if (atomic_dec_and_test(&(_b)->refcnt)) \
1978 +                       wake_up(&(_b)->waiting_to_free);\
1979 +       } while (0)
1980 +
1981 +/* Create a vbd. */
1982 +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
1983 +              unsigned minor, int readonly, int cdrom);
1984 +void vbd_free(struct vbd *vbd);
1985 +
1986 +unsigned long long vbd_size(struct vbd *vbd);
1987 +unsigned int vbd_info(struct vbd *vbd);
1988 +unsigned long vbd_secsize(struct vbd *vbd);
1989 +
1990 +struct phys_req {
1991 +       unsigned short       dev;
1992 +       unsigned short       nr_sects;
1993 +       struct block_device *bdev;
1994 +       blkif_sector_t       sector_number;
1995 +};
1996 +
1997 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
1998 +
1999 +void blkif_interface_init(void);
2000 +
2001 +void blkif_xenbus_init(void);
2002 +
2003 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
2004 +int blkif_schedule(void *arg);
2005 +
2006 +int blkback_barrier(struct xenbus_transaction xbt,
2007 +                   struct backend_info *be, int state);
2008 +
2009 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
2010 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
2011 +++ sle11sp1-2010-03-29/drivers/xen/blkback/interface.c 2010-01-04 11:56:34.000000000 +0100
2012 @@ -0,0 +1,185 @@
2013 +/******************************************************************************
2014 + * arch/xen/drivers/blkif/backend/interface.c
2015 + * 
2016 + * Block-device interface management.
2017 + * 
2018 + * Copyright (c) 2004, Keir Fraser
2019 + * 
2020 + * This program is free software; you can redistribute it and/or
2021 + * modify it under the terms of the GNU General Public License version 2
2022 + * as published by the Free Software Foundation; or, when distributed
2023 + * separately from the Linux kernel or incorporated into other
2024 + * software packages, subject to the following license:
2025 + * 
2026 + * Permission is hereby granted, free of charge, to any person obtaining a copy
2027 + * of this source file (the "Software"), to deal in the Software without
2028 + * restriction, including without limitation the rights to use, copy, modify,
2029 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2030 + * and to permit persons to whom the Software is furnished to do so, subject to
2031 + * the following conditions:
2032 + * 
2033 + * The above copyright notice and this permission notice shall be included in
2034 + * all copies or substantial portions of the Software.
2035 + * 
2036 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2037 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2038 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2039 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2040 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2041 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2042 + * IN THE SOFTWARE.
2043 + */
2044 +
2045 +#include "common.h"
2046 +#include <xen/evtchn.h>
2047 +#include <linux/kthread.h>
2048 +#include <linux/delay.h>
2049 +
2050 +static kmem_cache_t *blkif_cachep;
2051 +
2052 +blkif_t *blkif_alloc(domid_t domid)
2053 +{
2054 +       blkif_t *blkif;
2055 +
2056 +       blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
2057 +       if (!blkif)
2058 +               return ERR_PTR(-ENOMEM);
2059 +
2060 +       memset(blkif, 0, sizeof(*blkif));
2061 +       blkif->domid = domid;
2062 +       spin_lock_init(&blkif->blk_ring_lock);
2063 +       atomic_set(&blkif->refcnt, 1);
2064 +       init_waitqueue_head(&blkif->wq);
2065 +       blkif->st_print = jiffies;
2066 +       init_waitqueue_head(&blkif->waiting_to_free);
2067 +
2068 +       return blkif;
2069 +}
2070 +
2071 +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
2072 +{
2073 +       struct gnttab_map_grant_ref op;
2074 +
2075 +       gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
2076 +                         GNTMAP_host_map, shared_page, blkif->domid);
2077 +
2078 +    do {
2079 +           if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
2080 +                   BUG();
2081 +        msleep(100);
2082 +    } while(op.status == GNTST_eagain);
2083 +
2084 +       if (op.status) {
2085 +               DPRINTK(" Grant table operation failure !\n");
2086 +               return op.status;
2087 +       }
2088 +
2089 +       blkif->shmem_ref = shared_page;
2090 +       blkif->shmem_handle = op.handle;
2091 +
2092 +       return 0;
2093 +}
2094 +
2095 +static void unmap_frontend_page(blkif_t *blkif)
2096 +{
2097 +       struct gnttab_unmap_grant_ref op;
2098 +
2099 +       gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
2100 +                           GNTMAP_host_map, blkif->shmem_handle);
2101 +
2102 +       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
2103 +               BUG();
2104 +}
2105 +
2106 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
2107 +{
2108 +       int err;
2109 +
2110 +       /* Already connected through? */
2111 +       if (blkif->irq)
2112 +               return 0;
2113 +
2114 +       if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
2115 +               return -ENOMEM;
2116 +
2117 +       err = map_frontend_page(blkif, shared_page);
2118 +       if (err) {
2119 +               free_vm_area(blkif->blk_ring_area);
2120 +               return err;
2121 +       }
2122 +
2123 +       switch (blkif->blk_protocol) {
2124 +       case BLKIF_PROTOCOL_NATIVE:
2125 +       {
2126 +               blkif_sring_t *sring;
2127 +               sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
2128 +               BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
2129 +               break;
2130 +       }
2131 +       case BLKIF_PROTOCOL_X86_32:
2132 +       {
2133 +               blkif_x86_32_sring_t *sring_x86_32;
2134 +               sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
2135 +               BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
2136 +               break;
2137 +       }
2138 +       case BLKIF_PROTOCOL_X86_64:
2139 +       {
2140 +               blkif_x86_64_sring_t *sring_x86_64;
2141 +               sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
2142 +               BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
2143 +               break;
2144 +       }
2145 +       default:
2146 +               BUG();
2147 +       }
2148 +
2149 +       err = bind_interdomain_evtchn_to_irqhandler(
2150 +               blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
2151 +       if (err < 0)
2152 +       {
2153 +               unmap_frontend_page(blkif);
2154 +               free_vm_area(blkif->blk_ring_area);
2155 +               blkif->blk_rings.common.sring = NULL;
2156 +               return err;
2157 +       }
2158 +       blkif->irq = err;
2159 +
2160 +       return 0;
2161 +}
2162 +
2163 +void blkif_disconnect(blkif_t *blkif)
2164 +{
2165 +       if (blkif->xenblkd) {
2166 +               kthread_stop(blkif->xenblkd);
2167 +               blkif->xenblkd = NULL;
2168 +       }
2169 +
2170 +       atomic_dec(&blkif->refcnt);
2171 +       wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
2172 +       atomic_inc(&blkif->refcnt);
2173 +
2174 +       if (blkif->irq) {
2175 +               unbind_from_irqhandler(blkif->irq, blkif);
2176 +               blkif->irq = 0;
2177 +       }
2178 +
2179 +       if (blkif->blk_rings.common.sring) {
2180 +               unmap_frontend_page(blkif);
2181 +               free_vm_area(blkif->blk_ring_area);
2182 +               blkif->blk_rings.common.sring = NULL;
2183 +       }
2184 +}
2185 +
2186 +void blkif_free(blkif_t *blkif)
2187 +{
2188 +       if (!atomic_dec_and_test(&blkif->refcnt))
2189 +               BUG();
2190 +       kmem_cache_free(blkif_cachep, blkif);
2191 +}
2192 +
2193 +void __init blkif_interface_init(void)
2194 +{
2195 +       blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
2196 +                                        0, 0, NULL, NULL);
2197 +}
2198 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
2199 +++ sle11sp1-2010-03-29/drivers/xen/blkback/vbd.c       2010-03-22 12:00:53.000000000 +0100
2200 @@ -0,0 +1,161 @@
2201 +/******************************************************************************
2202 + * blkback/vbd.c
2203 + * 
2204 + * Routines for managing virtual block devices (VBDs).
2205 + * 
2206 + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
2207 + * 
2208 + * This program is free software; you can redistribute it and/or
2209 + * modify it under the terms of the GNU General Public License version 2
2210 + * as published by the Free Software Foundation; or, when distributed
2211 + * separately from the Linux kernel or incorporated into other
2212 + * software packages, subject to the following license:
2213 + * 
2214 + * Permission is hereby granted, free of charge, to any person obtaining a copy
2215 + * of this source file (the "Software"), to deal in the Software without
2216 + * restriction, including without limitation the rights to use, copy, modify,
2217 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
2218 + * and to permit persons to whom the Software is furnished to do so, subject to
2219 + * the following conditions:
2220 + * 
2221 + * The above copyright notice and this permission notice shall be included in
2222 + * all copies or substantial portions of the Software.
2223 + * 
2224 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2225 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2226 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2227 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2228 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2229 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2230 + * IN THE SOFTWARE.
2231 + */
2232 +
2233 +#include "common.h"
2234 +
2235 +#define vbd_sz(_v)   ((_v)->bdev->bd_part ?                            \
2236 +       (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
2237 +
2238 +unsigned long long vbd_size(struct vbd *vbd)
2239 +{
2240 +       return vbd_sz(vbd);
2241 +}
2242 +
2243 +unsigned int vbd_info(struct vbd *vbd)
2244 +{
2245 +       return vbd->type | (vbd->readonly?VDISK_READONLY:0);
2246 +}
2247 +
2248 +unsigned long vbd_secsize(struct vbd *vbd)
2249 +{
2250 +       return bdev_hardsect_size(vbd->bdev);
2251 +}
2252 +
2253 +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
2254 +              unsigned minor, int readonly, int cdrom)
2255 +{
2256 +       struct vbd *vbd;
2257 +       struct block_device *bdev;
2258 +
2259 +       vbd = &blkif->vbd;
2260 +       vbd->handle   = handle; 
2261 +       vbd->readonly = readonly;
2262 +       vbd->type     = 0;
2263 +
2264 +       vbd->pdevice  = MKDEV(major, minor);
2265 +
2266 +       bdev = open_by_devnum(vbd->pdevice,
2267 +                             vbd->readonly ? FMODE_READ : FMODE_WRITE);
2268 +
2269 +       if (IS_ERR(bdev)) {
2270 +               DPRINTK("vbd_creat: device %08x could not be opened.\n",
2271 +                       vbd->pdevice);
2272 +               return -ENOENT;
2273 +       }
2274 +
2275 +       vbd->bdev = bdev;
2276 +       vbd->size = vbd_size(vbd);
2277 +
2278 +       if (vbd->bdev->bd_disk == NULL) {
2279 +               DPRINTK("vbd_creat: device %08x doesn't exist.\n",
2280 +                       vbd->pdevice);
2281 +               vbd_free(vbd);
2282 +               return -ENOENT;
2283 +       }
2284 +
2285 +       if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
2286 +               vbd->type |= VDISK_CDROM;
2287 +       if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
2288 +               vbd->type |= VDISK_REMOVABLE;
2289 +
2290 +       DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
2291 +               handle, blkif->domid);
2292 +       return 0;
2293 +}
2294 +
2295 +void vbd_free(struct vbd *vbd)
2296 +{
2297 +       if (vbd->bdev)
2298 +               blkdev_put(vbd->bdev);
2299 +       vbd->bdev = NULL;
2300 +}
2301 +
2302 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
2303 +{
2304 +       struct vbd *vbd = &blkif->vbd;
2305 +       int rc = -EACCES;
2306 +
2307 +       if ((operation != READ) && vbd->readonly)
2308 +               goto out;
2309 +
2310 +       if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
2311 +               goto out;
2312 +
2313 +       req->dev  = vbd->pdevice;
2314 +       req->bdev = vbd->bdev;
2315 +       rc = 0;
2316 +
2317 + out:
2318 +       return rc;
2319 +}
2320 +
2321 +void vbd_resize(blkif_t *blkif)
2322 +{
2323 +       struct vbd *vbd = &blkif->vbd;
2324 +       struct xenbus_transaction xbt;
2325 +       int err;
2326 +       struct xenbus_device *dev = blkif->be->dev;
2327 +       unsigned long long new_size = vbd_size(vbd);
2328 +
2329 +       printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size);
2330 +       vbd->size = new_size;
2331 +again:
2332 +       err = xenbus_transaction_start(&xbt);
2333 +       if (err) {
2334 +               printk(KERN_WARNING "Error starting transaction");
2335 +               return;
2336 +       }
2337 +       err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu",
2338 +                           vbd_size(vbd));
2339 +       if (err) {
2340 +               printk(KERN_WARNING "Error writing new size");
2341 +               goto abort;
2342 +       }
2343 +       /*
2344 +        * Write the current state; we will use this to synchronize
2345 +        * the front-end. If the current state is "connected" the
2346 +        * front-end will get the new size information online.
2347 +        */
2348 +       err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
2349 +       if (err) {
2350 +               printk(KERN_WARNING "Error writing the state");
2351 +               goto abort;
2352 +       }
2353 +
2354 +       err = xenbus_transaction_end(xbt, 0);
2355 +       if (err == -EAGAIN)
2356 +               goto again;
2357 +       if (err)
2358 +               printk(KERN_WARNING "Error ending transaction");
2359 +abort:
2360 +       xenbus_transaction_end(xbt, 1);
2361 +}
2362 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
2363 +++ sle11sp1-2010-03-29/drivers/xen/blkback/xenbus.c    2010-03-22 12:00:53.000000000 +0100
2364 @@ -0,0 +1,551 @@
2365 +/*  Xenbus code for blkif backend
2366 +    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
2367 +    Copyright (C) 2005 XenSource Ltd
2368 +
2369 +    This program is free software; you can redistribute it and/or modify
2370 +    it under the terms of the GNU General Public License as published by
2371 +    the Free Software Foundation; either version 2 of the License, or
2372 +    (at your option) any later version.
2373 +
2374 +    This program is distributed in the hope that it will be useful,
2375 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
2376 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2377 +    GNU General Public License for more details.
2378 +
2379 +    You should have received a copy of the GNU General Public License
2380 +    along with this program; if not, write to the Free Software
2381 +    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
2382 +*/
2383 +
2384 +#include <stdarg.h>
2385 +#include <linux/module.h>
2386 +#include <linux/kthread.h>
2387 +#include "common.h"
2388 +
2389 +#undef DPRINTK
2390 +#define DPRINTK(fmt, args...)                          \
2391 +       pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",   \
2392 +                __FUNCTION__, __LINE__, ##args)
2393 +
2394 +static DEFINE_RWLOCK(sysfs_read_lock);
2395 +
2396 +static void connect(struct backend_info *);
2397 +static int connect_ring(struct backend_info *);
2398 +static void backend_changed(struct xenbus_watch *, const char **,
2399 +                           unsigned int);
2400 +
2401 +static int blkback_name(blkif_t *blkif, char *buf)
2402 +{
2403 +       char *devpath, *devname;
2404 +       struct xenbus_device *dev = blkif->be->dev;
2405 +
2406 +       devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
2407 +       if (IS_ERR(devpath)) 
2408 +               return PTR_ERR(devpath);
2409 +       
2410 +       if ((devname = strstr(devpath, "/dev/")) != NULL)
2411 +               devname += strlen("/dev/");
2412 +       else
2413 +               devname  = devpath;
2414 +
2415 +       snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
2416 +       kfree(devpath);
2417 +       
2418 +       return 0;
2419 +}
2420 +
2421 +static void update_blkif_status(blkif_t *blkif)
2422 +{ 
2423 +       int err;
2424 +       char name[TASK_COMM_LEN];
2425 +
2426 +       /* Not ready to connect? */
2427 +       if (!blkif->irq || !blkif->vbd.bdev)
2428 +               return;
2429 +
2430 +       /* Already connected? */
2431 +       if (blkif->be->dev->state == XenbusStateConnected)
2432 +               return;
2433 +
2434 +       /* Attempt to connect: exit if we fail to. */
2435 +       connect(blkif->be);
2436 +       if (blkif->be->dev->state != XenbusStateConnected)
2437 +               return;
2438 +
2439 +       err = blkback_name(blkif, name);
2440 +       if (err) {
2441 +               xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
2442 +               return;
2443 +       }
2444 +
2445 +       err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
2446 +       if (err) {
2447 +               xenbus_dev_error(blkif->be->dev, err, "block flush");
2448 +               return;
2449 +       }
2450 +       invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
2451 +
2452 +       blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
2453 +       if (IS_ERR(blkif->xenblkd)) {
2454 +               err = PTR_ERR(blkif->xenblkd);
2455 +               blkif->xenblkd = NULL;
2456 +               xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
2457 +       }
2458 +}
2459 +
2460 +
2461 +/****************************************************************
2462 + *  sysfs interface for VBD I/O requests
2463 + */
2464 +
2465 +#define VBD_SHOW(name, format, args...)                                        \
2466 +       static ssize_t show_##name(struct device *_dev,                 \
2467 +                                  struct device_attribute *attr,       \
2468 +                                  char *buf)                           \
2469 +       {                                                               \
2470 +               ssize_t ret = -ENODEV;                                  \
2471 +               struct xenbus_device *dev;                              \
2472 +               struct backend_info *be;                                \
2473 +                                                                       \
2474 +               if (!get_device(_dev))                                  \
2475 +                       return ret;                                     \
2476 +               dev = to_xenbus_device(_dev);                           \
2477 +               read_lock(&sysfs_read_lock);                            \
2478 +               if ((be = dev->dev.driver_data) != NULL)                \
2479 +                       ret = sprintf(buf, format, ##args);             \
2480 +               read_unlock(&sysfs_read_lock);                          \
2481 +               put_device(_dev);                                       \
2482 +               return ret;                                             \
2483 +       }                                                               \
2484 +       static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
2485 +
2486 +VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
2487 +VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
2488 +VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
2489 +VBD_SHOW(br_req,  "%d\n", be->blkif->st_br_req);
2490 +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
2491 +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
2492 +
2493 +static struct attribute *vbdstat_attrs[] = {
2494 +       &dev_attr_oo_req.attr,
2495 +       &dev_attr_rd_req.attr,
2496 +       &dev_attr_wr_req.attr,
2497 +       &dev_attr_br_req.attr,
2498 +       &dev_attr_rd_sect.attr,
2499 +       &dev_attr_wr_sect.attr,
2500 +       NULL
2501 +};
2502 +
2503 +static struct attribute_group vbdstat_group = {
2504 +       .name = "statistics",
2505 +       .attrs = vbdstat_attrs,
2506 +};
2507 +
2508 +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
2509 +VBD_SHOW(mode, "%s\n", be->mode);
2510 +
2511 +int xenvbd_sysfs_addif(struct xenbus_device *dev)
2512 +{
2513 +       int error;
2514 +       
2515 +       error = device_create_file(&dev->dev, &dev_attr_physical_device);
2516 +       if (error)
2517 +               goto fail1;
2518 +
2519 +       error = device_create_file(&dev->dev, &dev_attr_mode);
2520 +       if (error)
2521 +               goto fail2;
2522 +
2523 +       error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
2524 +       if (error)
2525 +               goto fail3;
2526 +
2527 +       return 0;
2528 +
2529 +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2530 +fail2: device_remove_file(&dev->dev, &dev_attr_mode);
2531 +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
2532 +       return error;
2533 +}
2534 +
2535 +void xenvbd_sysfs_delif(struct xenbus_device *dev)
2536 +{
2537 +       sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
2538 +       device_remove_file(&dev->dev, &dev_attr_mode);
2539 +       device_remove_file(&dev->dev, &dev_attr_physical_device);
2540 +}
2541 +
2542 +static int blkback_remove(struct xenbus_device *dev)
2543 +{
2544 +       struct backend_info *be = dev->dev.driver_data;
2545 +
2546 +       DPRINTK("");
2547 +
2548 +       write_lock(&sysfs_read_lock);
2549 +       if (be->major || be->minor)
2550 +               xenvbd_sysfs_delif(dev);
2551 +
2552 +       if (be->backend_watch.node) {
2553 +               unregister_xenbus_watch(&be->backend_watch);
2554 +               kfree(be->backend_watch.node);
2555 +               be->backend_watch.node = NULL;
2556 +       }
2557 +
2558 +       if (be->blkif) {
2559 +               blkif_disconnect(be->blkif);
2560 +               vbd_free(&be->blkif->vbd);
2561 +               blkif_free(be->blkif);
2562 +               be->blkif = NULL;
2563 +       }
2564 +
2565 +       kfree(be);
2566 +       dev->dev.driver_data = NULL;
2567 +       write_unlock(&sysfs_read_lock);
2568 +       return 0;
2569 +}
2570 +
2571 +int blkback_barrier(struct xenbus_transaction xbt,
2572 +                   struct backend_info *be, int state)
2573 +{
2574 +       struct xenbus_device *dev = be->dev;
2575 +       int err;
2576 +
2577 +       err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
2578 +                           "%d", state);
2579 +       if (err)
2580 +               xenbus_dev_fatal(dev, err, "writing feature-barrier");
2581 +
2582 +       return err;
2583 +}
2584 +
2585 +/**
2586 + * Entry point to this code when a new device is created.  Allocate the basic
2587 + * structures, and watch the store waiting for the hotplug scripts to tell us
2588 + * the device's physical major and minor numbers.  Switch to InitWait.
2589 + */
2590 +static int blkback_probe(struct xenbus_device *dev,
2591 +                        const struct xenbus_device_id *id)
2592 +{
2593 +       int err;
2594 +       struct backend_info *be = kzalloc(sizeof(struct backend_info),
2595 +                                         GFP_KERNEL);
2596 +       if (!be) {
2597 +               xenbus_dev_fatal(dev, -ENOMEM,
2598 +                                "allocating backend structure");
2599 +               return -ENOMEM;
2600 +       }
2601 +       be->dev = dev;
2602 +       dev->dev.driver_data = be;
2603 +
2604 +       be->blkif = blkif_alloc(dev->otherend_id);
2605 +       if (IS_ERR(be->blkif)) {
2606 +               err = PTR_ERR(be->blkif);
2607 +               be->blkif = NULL;
2608 +               xenbus_dev_fatal(dev, err, "creating block interface");
2609 +               goto fail;
2610 +       }
2611 +
2612 +       /* setup back pointer */
2613 +       be->blkif->be = be;
2614 +
2615 +       err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
2616 +                                &be->backend_watch, backend_changed);
2617 +       if (err)
2618 +               goto fail;
2619 +
2620 +       err = xenbus_switch_state(dev, XenbusStateInitWait);
2621 +       if (err)
2622 +               goto fail;
2623 +
2624 +       return 0;
2625 +
2626 +fail:
2627 +       DPRINTK("failed");
2628 +       blkback_remove(dev);
2629 +       return err;
2630 +}
2631 +
2632 +
2633 +/**
2634 + * Callback received when the hotplug scripts have placed the physical-device
2635 + * node.  Read it and the mode node, and create a vbd.  If the frontend is
2636 + * ready, connect.
2637 + */
2638 +static void backend_changed(struct xenbus_watch *watch,
2639 +                           const char **vec, unsigned int len)
2640 +{
2641 +       int err;
2642 +       unsigned major;
2643 +       unsigned minor;
2644 +       struct backend_info *be
2645 +               = container_of(watch, struct backend_info, backend_watch);
2646 +       struct xenbus_device *dev = be->dev;
2647 +       int cdrom = 0;
2648 +       char *device_type;
2649 +
2650 +       DPRINTK("");
2651 +
2652 +       err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
2653 +                          &major, &minor);
2654 +       if (XENBUS_EXIST_ERR(err)) {
2655 +               /* Since this watch will fire once immediately after it is
2656 +                  registered, we expect this.  Ignore it, and wait for the
2657 +                  hotplug scripts. */
2658 +               return;
2659 +       }
2660 +       if (err != 2) {
2661 +               xenbus_dev_fatal(dev, err, "reading physical-device");
2662 +               return;
2663 +       }
2664 +
2665 +       if ((be->major || be->minor) &&
2666 +           ((be->major != major) || (be->minor != minor))) {
2667 +               printk(KERN_WARNING
2668 +                      "blkback: changing physical device (from %x:%x to "
2669 +                      "%x:%x) not supported.\n", be->major, be->minor,
2670 +                      major, minor);
2671 +               return;
2672 +       }
2673 +
2674 +       be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
2675 +       if (IS_ERR(be->mode)) {
2676 +               err = PTR_ERR(be->mode);
2677 +               be->mode = NULL;