- Update Xen patches to 3.2-final and c/s 1140.
[opensuse:kernel-source.git] / patches.xen / xen3-auto-common.diff
1 Subject: xen3 common
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1140:821a5b2a10c8)
3 Patch-mainline: n/a
4 Acked-by: jbeulich@novell.com
5
6 List of files that don't require modification anymore (and hence got
7 removed from this patch), for reference and in case upstream wants to
8 take the forward porting patches:
9 2.6.19/include/linux/skbuff.h
10 2.6.19/net/core/dev.c
11 2.6.19/net/core/skbuff.c
12 2.6.19/net/ipv4/netfilter/nf_nat_proto_tcp.c
13 2.6.19/net/ipv4/netfilter/nf_nat_proto_udp.c
14 2.6.19/net/ipv4/xfrm4_output.c
15 2.6.22/include/linux/sched.h
16 2.6.22/kernel/softlockup.c
17 2.6.22/kernel/timer.c
18 2.6.25/mm/highmem.c
19 2.6.30/include/linux/pci_regs.h
20 2.6.35/kernel/time.c
21
22 --- head-2012-01-06.orig/drivers/Makefile       2012-01-06 10:21:23.000000000 +0100
23 +++ head-2012-01-06/drivers/Makefile    2011-11-16 17:01:23.000000000 +0100
24 @@ -47,6 +47,7 @@ obj-$(CONFIG_PARPORT)         += parport/
25  obj-y                          += base/ block/ misc/ mfd/ nfc/
26  obj-$(CONFIG_NUBUS)            += nubus/
27  obj-y                          += macintosh/
28 +obj-$(CONFIG_XEN)              += xen/
29  obj-$(CONFIG_IDE)              += ide/
30  obj-$(CONFIG_SCSI)             += scsi/
31  obj-$(CONFIG_ATA)              += ata/
32 --- head-2012-01-06.orig/drivers/acpi/Makefile  2012-01-06 10:21:23.000000000 +0100
33 +++ head-2012-01-06/drivers/acpi/Makefile       2011-04-13 11:25:29.000000000 +0200
34 @@ -67,6 +67,9 @@ obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custo
35  processor-y                    := processor_driver.o processor_throttling.o
36  processor-y                    += processor_idle.o processor_thermal.o
37  processor-$(CONFIG_CPU_FREQ)   += processor_perflib.o
38 +ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
39 +processor-objs += processor_perflib.o processor_extcntl.o
40 +endif
41  
42  obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
43  obj-$(CONFIG_ACPI_IPMI)                += acpi_ipmi.o
44 --- head-2012-01-06.orig/drivers/acpi/acpica/hwsleep.c  2012-01-06 10:21:23.000000000 +0100
45 +++ head-2012-01-06/drivers/acpi/acpica/hwsleep.c       2011-11-16 17:01:18.000000000 +0100
46 @@ -237,7 +237,11 @@ acpi_status asmlinkage acpi_enter_sleep_
47         u32 pm1b_control;
48         struct acpi_bit_register_info *sleep_type_reg_info;
49         struct acpi_bit_register_info *sleep_enable_reg_info;
50 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
51         u32 in_value;
52 +#else
53 +       int err;
54 +#endif
55         struct acpi_object_list arg_list;
56         union acpi_object arg;
57         acpi_status status;
58 @@ -348,6 +352,7 @@ acpi_status asmlinkage acpi_enter_sleep_
59  
60         /* Write #2: Write both SLP_TYP + SLP_EN */
61  
62 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
63         status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
64         if (ACPI_FAILURE(status)) {
65                 return_ACPI_STATUS(status);
66 @@ -387,6 +392,16 @@ acpi_status asmlinkage acpi_enter_sleep_
67                 /* Spin until we wake */
68  
69         } while (!in_value);
70 +#else
71 +       /* PV ACPI just need check hypercall return value */
72 +       err = acpi_notify_hypervisor_state(sleep_state,
73 +                       PM1Acontrol, PM1Bcontrol);
74 +       if (err) {
75 +               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
76 +                                 "Hypervisor failure [%d]\n", err));
77 +               return_ACPI_STATUS(AE_ERROR);
78 +       }
79 +#endif
80  
81         return_ACPI_STATUS(AE_OK);
82  }
83 --- head-2012-01-06.orig/drivers/acpi/processor_driver.c        2012-01-06 10:21:23.000000000 +0100
84 +++ head-2012-01-06/drivers/acpi/processor_driver.c     2011-12-07 11:54:36.000000000 +0100
85 @@ -325,7 +325,8 @@ static int acpi_processor_get_info(struc
86          */
87         if (pr->id == -1) {
88                 if (ACPI_FAILURE
89 -                   (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
90 +                   (acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
91 +                   !processor_cntl_external()) {
92                         return -ENODEV;
93                 }
94         }
95 @@ -376,7 +377,14 @@ static int acpi_processor_get_info(struc
96         return 0;
97  }
98  
99 +#ifndef CONFIG_XEN
100  static DEFINE_PER_CPU(void *, processor_device_array);
101 +#else
102 +#include <linux/mutex.h>
103 +#include <linux/radix-tree.h>
104 +static DEFINE_MUTEX(processor_device_mutex);
105 +static RADIX_TREE(processor_device_tree, GFP_KERNEL);
106 +#endif
107  
108  static void acpi_processor_notify(struct acpi_device *device, u32 event)
109  {
110 @@ -462,8 +470,11 @@ static int __cpuinit acpi_processor_add(
111         strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
112         device->driver_data = pr;
113  
114 +       processor_extcntl_init();
115 +
116         result = acpi_processor_get_info(device);
117 -       if (result) {
118 +       if (result ||
119 +           ((pr->id == -1) && !processor_cntl_external())) {
120                 /* Processor is physically not present */
121                 return 0;
122         }
123 @@ -473,22 +484,43 @@ static int __cpuinit acpi_processor_add(
124                 return 0;
125  #endif
126  
127 -       BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
128 +       BUG_ON(!processor_cntl_external() &&
129 +              ((pr->id >= nr_cpu_ids) || (pr->id < 0)));
130  
131         /*
132          * Buggy BIOS check
133          * ACPI id of processors can be reported wrongly by the BIOS.
134          * Don't trust it blindly
135          */
136 +#ifndef CONFIG_XEN
137         if (per_cpu(processor_device_array, pr->id) != NULL &&
138             per_cpu(processor_device_array, pr->id) != device) {
139 +#else
140 +       mutex_lock(&processor_device_mutex);
141 +       result = radix_tree_insert(&processor_device_tree,
142 +                                  pr->acpi_id, device);
143 +       switch (result) {
144 +       default:
145 +               goto err_unlock_free_cpumask;
146 +       case -EEXIST:
147 +               if (radix_tree_lookup(&processor_device_tree,
148 +                                     pr->acpi_id) == device) {
149 +       case 0:
150 +                       mutex_unlock(&processor_device_mutex);
151 +                       break;
152 +               }
153 +               mutex_unlock(&processor_device_mutex);
154 +#endif
155                 printk(KERN_WARNING "BIOS reported wrong ACPI id "
156                         "for the processor\n");
157                 result = -ENODEV;
158                 goto err_free_cpumask;
159         }
160 +#ifndef CONFIG_XEN
161         per_cpu(processor_device_array, pr->id) = device;
162 -
163 +#else
164 +       if (pr->id != -1)
165 +#endif
166         per_cpu(processors, pr->id) = pr;
167  
168         sysdev = get_cpu_sysdev(pr->id);
169 @@ -497,15 +529,27 @@ static int __cpuinit acpi_processor_add(
170                 goto err_free_cpumask;
171         }
172  
173 -#ifdef CONFIG_CPU_FREQ
174 +#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
175         acpi_processor_ppc_has_changed(pr, 0);
176  #endif
177 -       acpi_processor_get_throttling_info(pr);
178 -       acpi_processor_get_limit_info(pr);
179 +       /*
180 +        * pr->id may equal to -1 while processor_cntl_external enabled.
181 +        * throttle and thermal module don't support this case.
182 +        * Tx only works when dom0 vcpu == pcpu num by far, as we give
183 +        * control to dom0.
184 +        */
185 +       if (pr->id != -1) {
186 +               acpi_processor_get_throttling_info(pr);
187 +               acpi_processor_get_limit_info(pr);
188 +       }
189  
190         if (!cpuidle_get_driver() || cpuidle_get_driver() == &acpi_idle_driver)
191                 acpi_processor_power_init(pr, device);
192  
193 +       result = processor_extcntl_prepare(pr);
194 +       if (result)
195 +               goto end;
196 +
197         pr->cdev = thermal_cooling_device_register("Processor", device,
198                                                 &processor_cooling_ops);
199         if (IS_ERR(pr->cdev)) {
200 @@ -540,6 +584,14 @@ err_thermal_unregister:
201  err_power_exit:
202         acpi_processor_power_exit(pr, device);
203  err_free_cpumask:
204 +#ifdef CONFIG_XEN
205 +       mutex_lock(&processor_device_mutex);
206 +       if (radix_tree_lookup(&processor_device_tree,
207 +                             pr->acpi_id) == device)
208 +               radix_tree_delete(&processor_device_tree, pr->acpi_id);
209 +err_unlock_free_cpumask:
210 +       mutex_unlock(&processor_device_mutex);
211 +#endif
212         free_cpumask_var(pr->throttling.shared_cpu_map);
213  
214         return result;
215 @@ -555,7 +607,7 @@ static int acpi_processor_remove(struct 
216  
217         pr = acpi_driver_data(device);
218  
219 -       if (pr->id >= nr_cpu_ids)
220 +       if (!processor_cntl_external() && pr->id >= nr_cpu_ids)
221                 goto free;
222  
223         if (type == ACPI_BUS_REMOVAL_EJECT) {
224 @@ -574,8 +626,16 @@ static int acpi_processor_remove(struct 
225                 pr->cdev = NULL;
226         }
227  
228 +#ifndef CONFIG_XEN
229         per_cpu(processors, pr->id) = NULL;
230         per_cpu(processor_device_array, pr->id) = NULL;
231 +#else
232 +       if (pr->id != -1)
233 +               per_cpu(processors, pr->id) = NULL;
234 +       mutex_lock(&processor_device_mutex);
235 +       radix_tree_delete(&processor_device_tree, pr->acpi_id);
236 +       mutex_unlock(&processor_device_mutex);
237 +#endif
238  
239  free:
240         free_cpumask_var(pr->throttling.shared_cpu_map);
241 @@ -631,6 +691,10 @@ int acpi_processor_device_add(acpi_handl
242                 return -ENODEV;
243         }
244  
245 +       if (processor_cntl_external() && acpi_driver_data(*device))
246 +               processor_notify_external(acpi_driver_data(*device),
247 +                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
248 +
249         return 0;
250  }
251  
252 @@ -660,6 +724,10 @@ static void acpi_processor_hotplug_notif
253                                             "Unable to add the device\n");
254                         break;
255                 }
256 +               pr = acpi_driver_data(device);
257 +               if (processor_cntl_external() && pr)
258 +                       processor_notify_external(pr,
259 +                                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
260                 break;
261         case ACPI_NOTIFY_EJECT_REQUEST:
262                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
263 @@ -676,6 +744,9 @@ static void acpi_processor_hotplug_notif
264                                     "Driver data is NULL, dropping EJECT\n");
265                         return;
266                 }
267 +               if (processor_cntl_external())
268 +                       processor_notify_external(pr, PROCESSOR_HOTPLUG,
269 +                                               HOTPLUG_TYPE_REMOVE);
270                 break;
271         default:
272                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
273 @@ -740,6 +811,11 @@ static acpi_status acpi_processor_hotadd
274  
275  static int acpi_processor_handle_eject(struct acpi_processor *pr)
276  {
277 +#ifdef CONFIG_XEN
278 +       if (pr->id == -1)
279 +               return (0);
280 +#endif
281 +
282         if (cpu_online(pr->id))
283                 cpu_down(pr->id);
284  
285 @@ -829,6 +905,30 @@ static void __exit acpi_processor_exit(v
286  
287         cpuidle_unregister_driver(&acpi_idle_driver);
288  
289 +#ifdef CONFIG_XEN
290 +       {
291 +               struct acpi_device *dev;
292 +               unsigned int idx = 0;
293 +
294 +               while (radix_tree_gang_lookup(&processor_device_tree,
295 +                                             (void **)&dev, idx, 1)) {
296 +                       struct acpi_processor *pr = acpi_driver_data(dev);
297 +
298 +                       /* prevent live lock */
299 +                       if (pr->acpi_id < idx) {
300 +                               printk(KERN_WARNING PREFIX "ID %u unexpected"
301 +                                      " (less than %u); leaking memory\n",
302 +                                      pr->acpi_id, idx);
303 +                               break;
304 +                       }
305 +                       idx = pr->acpi_id;
306 +                       radix_tree_delete(&processor_device_tree, idx);
307 +                       if (!++idx)
308 +                               break;
309 +               }
310 +       }
311 +#endif
312 +
313         return;
314  }
315  
316 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
317 +++ head-2012-01-06/drivers/acpi/processor_extcntl.c    2011-01-31 14:53:38.000000000 +0100
318 @@ -0,0 +1,241 @@
319 +/*
320 + * processor_extcntl.c - channel to external control logic
321 + *
322 + *  Copyright (C) 2008, Intel corporation
323 + *
324 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
325 + *
326 + *  This program is free software; you can redistribute it and/or modify
327 + *  it under the terms of the GNU General Public License as published by
328 + *  the Free Software Foundation; either version 2 of the License, or (at
329 + *  your option) any later version.
330 + *
331 + *  This program is distributed in the hope that it will be useful, but
332 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
333 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
334 + *  General Public License for more details.
335 + *
336 + *  You should have received a copy of the GNU General Public License along
337 + *  with this program; if not, write to the Free Software Foundation, Inc.,
338 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
339 + *
340 + */
341 +
342 +#include <linux/kernel.h>
343 +#include <linux/init.h>
344 +#include <linux/types.h>
345 +#include <linux/acpi.h>
346 +#include <linux/pm.h>
347 +#include <linux/cpu.h>
348 +
349 +#include <acpi/processor.h>
350 +
351 +#define ACPI_PROCESSOR_COMPONENT        0x01000000
352 +#define ACPI_PROCESSOR_CLASS            "processor"
353 +#define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
354 +#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
355 +ACPI_MODULE_NAME("acpi_processor")
356 +
357 +static int processor_extcntl_parse_csd(struct acpi_processor *pr);
358 +static int processor_extcntl_get_performance(struct acpi_processor *pr);
359 +/*
360 + * External processor control logic may register with its own set of
361 + * ops to get ACPI related notification. One example is like VMM.
362 + */
363 +const struct processor_extcntl_ops *processor_extcntl_ops;
364 +EXPORT_SYMBOL(processor_extcntl_ops);
365 +
366 +static int processor_notify_smm(void)
367 +{
368 +       acpi_status status;
369 +       static int is_done = 0;
370 +
371 +       /* only need successfully notify BIOS once */
372 +       /* avoid double notification which may lead to unexpected result */
373 +       if (is_done)
374 +               return 0;
375 +
376 +       /* Can't write pstate_cnt to smi_cmd if either value is zero */
377 +       if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
378 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
379 +               return 0;
380 +       }
381 +
382 +       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
383 +               "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
384 +               acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
385 +
386 +       /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
387 +        * it anyway, so we need to support it... */
388 +       if (acpi_fadt_is_v1) {
389 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
390 +                       "Using v1.0 FADT reserved value for pstate_cnt\n"));
391 +       }
392 +
393 +       status = acpi_os_write_port(acpi_fadt.smi_cmd,
394 +                                   (u32) acpi_fadt.pstate_cnt, 8);
395 +       if (ACPI_FAILURE(status))
396 +               return status;
397 +
398 +       is_done = 1;
399 +
400 +       return 0;
401 +}
402 +
403 +int processor_notify_external(struct acpi_processor *pr, int event, int type)
404 +{
405 +       int ret = -EINVAL;
406 +
407 +       if (!processor_cntl_external())
408 +               return -EINVAL;
409 +
410 +       switch (event) {
411 +       case PROCESSOR_PM_INIT:
412 +       case PROCESSOR_PM_CHANGE:
413 +               if ((type >= PM_TYPE_MAX) ||
414 +                       !processor_extcntl_ops->pm_ops[type])
415 +                       break;
416 +
417 +               ret = processor_extcntl_ops->pm_ops[type](pr, event);
418 +               break;
419 +       case PROCESSOR_HOTPLUG:
420 +               if (processor_extcntl_ops->hotplug)
421 +                       ret = processor_extcntl_ops->hotplug(pr, type);
422 +               break;
423 +       default:
424 +               printk(KERN_ERR "Unsupport processor events %d.\n", event);
425 +               break;
426 +       }
427 +
428 +       return ret;
429 +}
430 +
431 +/*
432 + * External control logic can decide to grab full or part of physical
433 + * processor control bits. Take a VMM for example, physical processors
434 + * are owned by VMM and thus existence information like hotplug is
435 + * always required to be notified to VMM. Similar is processor idle
436 + * state which is also necessarily controlled by VMM. But for other
437 + * control bits like performance/throttle states, VMM may choose to
438 + * control or not upon its own policy.
439 + */
440 +void processor_extcntl_init(void)
441 +{
442 +       if (!processor_extcntl_ops)
443 +               arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
444 +}
445 +
446 +/*
447 + * This is called from ACPI processor init, and targeted to hold
448 + * some tricky housekeeping jobs to satisfy external control model.
449 + * For example, we may put dependency parse stub here for idle
450 + * and performance state. Those information may be not available
451 + * if splitting from dom0 control logic like cpufreq driver.
452 + */
453 +int processor_extcntl_prepare(struct acpi_processor *pr)
454 +{
455 +       /* parse cstate dependency information */
456 +       if (processor_pm_external())
457 +               processor_extcntl_parse_csd(pr);
458 +
459 +       /* Initialize performance states */
460 +       if (processor_pmperf_external())
461 +               processor_extcntl_get_performance(pr);
462 +
463 +       return 0;
464 +}
465 +
466 +/*
467 + * Currently no _CSD is implemented which is why existing ACPI code
468 + * doesn't parse _CSD at all. But to keep interface complete with
469 + * external control logic, we put a placeholder here for future
470 + * compatibility.
471 + */
472 +static int processor_extcntl_parse_csd(struct acpi_processor *pr)
473 +{
474 +       int i;
475 +
476 +       for (i = 0; i < pr->power.count; i++) {
477 +               if (!pr->power.states[i].valid)
478 +                       continue;
479 +
480 +               /* No dependency by default */
481 +               pr->power.states[i].domain_info = NULL;
482 +               pr->power.states[i].csd_count = 0;
483 +       }
484 +
485 +       return 0;
486 +}
487 +
488 +/*
489 + * Existing ACPI module does parse performance states at some point,
490 + * when acpi-cpufreq driver is loaded which however is something
491 + * we'd like to disable to avoid confliction with external control
492 + * logic. So we have to collect raw performance information here
493 + * when ACPI processor object is found and started.
494 + */
495 +static int processor_extcntl_get_performance(struct acpi_processor *pr)
496 +{
497 +       int ret;
498 +       struct acpi_processor_performance *perf;
499 +       struct acpi_psd_package *pdomain;
500 +
501 +       if (pr->performance)
502 +               return -EBUSY;
503 +
504 +       perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
505 +       if (!perf)
506 +               return -ENOMEM;
507 +
508 +       pr->performance = perf;
509 +       /* Get basic performance state information */
510 +       ret = acpi_processor_get_performance_info(pr);
511 +       if (ret < 0)
512 +               goto err_out;
513 +
514 +       /*
515 +        * Well, here we need retrieve performance dependency information
516 +        * from _PSD object. The reason why existing interface is not used
517 +        * is due to the reason that existing interface sticks to Linux cpu
518 +        * id to construct some bitmap, however we want to split ACPI
519 +        * processor objects from Linux cpu id logic. For example, even
520 +        * when Linux is configured as UP, we still want to parse all ACPI
521 +        * processor objects to external logic. In this case, it's preferred
522 +        * to use ACPI ID instead.
523 +        */
524 +       pdomain = &pr->performance->domain_info;
525 +       pdomain->num_processors = 0;
526 +       ret = acpi_processor_get_psd(pr);
527 +       if (ret < 0) {
528 +               /*
529 +                * _PSD is optional - assume no coordination if absent (or
530 +                * broken), matching native kernels' behavior.
531 +                */
532 +               pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
533 +               pdomain->revision = ACPI_PSD_REV0_REVISION;
534 +               pdomain->domain = pr->acpi_id;
535 +               pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
536 +               pdomain->num_processors = 1;
537 +       }
538 +
539 +       /* Some sanity check */
540 +       if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
541 +           (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
542 +           ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
543 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
544 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
545 +               ret = -EINVAL;
546 +               goto err_out;
547 +       }
548 +
549 +       /* Last step is to notify BIOS that external logic exists */
550 +       processor_notify_smm();
551 +
552 +       processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
553 +
554 +       return 0;
555 +err_out:
556 +       pr->performance = NULL;
557 +       kfree(perf);
558 +       return ret;
559 +}
560 --- head-2012-01-06.orig/drivers/acpi/processor_idle.c  2012-01-06 10:21:23.000000000 +0100
561 +++ head-2012-01-06/drivers/acpi/processor_idle.c       2011-11-16 17:01:10.000000000 +0100
562 @@ -449,7 +449,8 @@ static int acpi_processor_get_power_info
563                                  */
564                                 cx.entry_method = ACPI_CSTATE_HALT;
565                                 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
566 -                       } else {
567 +                       /* This doesn't apply to external control case */
568 +                       } else if (!processor_pm_external()) {
569                                 continue;
570                         }
571                         if (cx.type == ACPI_STATE_C1 &&
572 @@ -488,6 +489,12 @@ static int acpi_processor_get_power_info
573  
574                 cx.power = obj->integer.value;
575  
576 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
577 +               /* cache control methods to notify external logic */
578 +               if (processor_pm_external())
579 +                       memcpy(&cx.reg, reg, sizeof(*reg));
580 +#endif
581 +
582                 current_count++;
583                 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
584  
585 @@ -509,7 +516,7 @@ static int acpi_processor_get_power_info
586                           current_count));
587  
588         /* Validate number of power states discovered */
589 -       if (current_count < 2)
590 +       if (current_count < (processor_pm_external() ? 1 : 2))
591                 status = -EFAULT;
592  
593        end:
594 @@ -1259,6 +1266,11 @@ int __cpuinit acpi_processor_power_init(
595                 }
596                 acpi_processor_registered++;
597         }
598 +
599 +       if (processor_pm_external())
600 +               processor_notify_external(pr,
601 +                       PROCESSOR_PM_INIT, PM_TYPE_IDLE);
602 +
603         return 0;
604  }
605  
606 --- head-2012-01-06.orig/drivers/acpi/processor_perflib.c       2012-01-06 10:21:23.000000000 +0100
607 +++ head-2012-01-06/drivers/acpi/processor_perflib.c    2011-06-30 15:36:54.000000000 +0200
608 @@ -75,6 +75,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre
609  
610  static int acpi_processor_ppc_status;
611  
612 +#ifdef CONFIG_CPU_FREQ
613  static int acpi_processor_ppc_notifier(struct notifier_block *nb,
614                                        unsigned long event, void *data)
615  {
616 @@ -117,6 +118,7 @@ static int acpi_processor_ppc_notifier(s
617  static struct notifier_block acpi_ppc_notifier_block = {
618         .notifier_call = acpi_processor_ppc_notifier,
619  };
620 +#endif /* CONFIG_CPU_FREQ */
621  
622  static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
623  {
624 @@ -205,7 +207,12 @@ int acpi_processor_ppc_has_changed(struc
625         if (ret < 0)
626                 return (ret);
627         else
628 +#ifdef CONFIG_CPU_FREQ
629                 return cpufreq_update_policy(pr->id);
630 +#elif defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
631 +               return processor_notify_external(pr,
632 +                               PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
633 +#endif
634  }
635  
636  int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
637 @@ -221,6 +228,7 @@ int acpi_processor_get_bios_limit(int cp
638  }
639  EXPORT_SYMBOL(acpi_processor_get_bios_limit);
640  
641 +#ifdef CONFIG_CPU_FREQ
642  void acpi_processor_ppc_init(void)
643  {
644         if (!cpufreq_register_notifier
645 @@ -239,6 +247,7 @@ void acpi_processor_ppc_exit(void)
646  
647         acpi_processor_ppc_status &= ~PPC_REGISTERED;
648  }
649 +#endif /* CONFIG_CPU_FREQ */
650  
651  static int acpi_processor_get_performance_control(struct acpi_processor *pr)
652  {
653 @@ -386,7 +395,10 @@ static int acpi_processor_get_performanc
654         return result;
655  }
656  
657 -static int acpi_processor_get_performance_info(struct acpi_processor *pr)
658 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
659 +static
660 +#endif
661 +int acpi_processor_get_performance_info(struct acpi_processor *pr)
662  {
663         int result = 0;
664         acpi_status status = AE_OK;
665 @@ -431,6 +443,7 @@ static int acpi_processor_get_performanc
666         return result;
667  }
668  
669 +#ifdef CONFIG_CPU_FREQ
670  int acpi_processor_notify_smm(struct module *calling_module)
671  {
672         acpi_status status;
673 @@ -491,8 +504,12 @@ int acpi_processor_notify_smm(struct mod
674  }
675  
676  EXPORT_SYMBOL(acpi_processor_notify_smm);
677 +#endif /* CONFIG_CPU_FREQ */
678  
679 -static int acpi_processor_get_psd(struct acpi_processor        *pr)
680 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
681 +static
682 +#endif
683 +int acpi_processor_get_psd(struct acpi_processor *pr)
684  {
685         int result = 0;
686         acpi_status status = AE_OK;
687 --- head-2012-01-06.orig/drivers/acpi/sleep.c   2012-01-06 10:21:23.000000000 +0100
688 +++ head-2012-01-06/drivers/acpi/sleep.c        2011-04-13 11:25:29.000000000 +0200
689 @@ -61,6 +61,7 @@ static struct notifier_block tts_notifie
690  static int acpi_sleep_prepare(u32 acpi_state)
691  {
692  #ifdef CONFIG_ACPI_SLEEP
693 +#ifndef CONFIG_ACPI_PV_SLEEP
694         /* do we have a wakeup address for S2 and S3? */
695         if (acpi_state == ACPI_STATE_S3) {
696                 if (!acpi_wakeup_address) {
697 @@ -70,6 +71,7 @@ static int acpi_sleep_prepare(u32 acpi_s
698                                 (acpi_physical_address)acpi_wakeup_address);
699  
700         }
701 +#endif
702         ACPI_FLUSH_CPU_CACHE();
703  #endif
704         printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n",
705 --- head-2012-01-06.orig/drivers/block/floppy.c 2012-01-06 10:21:23.000000000 +0100
706 +++ head-2012-01-06/drivers/block/floppy.c      2010-03-25 14:39:44.000000000 +0100
707 @@ -146,7 +146,9 @@
708  
709  #undef  FLOPPY_SILENT_DCL_CLEAR
710  
711 +#ifndef CONFIG_XEN
712  #define REALLY_SLOW_IO
713 +#endif
714  
715  #define DEBUGT 2
716  
717 --- head-2012-01-06.orig/drivers/char/agp/intel-gtt.c   2012-01-06 10:21:23.000000000 +0100
718 +++ head-2012-01-06/drivers/char/agp/intel-gtt.c        2011-04-13 11:25:29.000000000 +0200
719 @@ -147,8 +147,19 @@ static struct page *i8xx_alloc_pages(voi
720         if (page == NULL)
721                 return NULL;
722  
723 +#ifdef CONFIG_XEN
724 +       if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
725 +               __free_pages(page, 2);
726 +               return NULL;
727 +       }
728 +#endif
729 +
730         if (set_pages_uc(page, 4) < 0) {
731                 set_pages_wb(page, 4);
732 +#ifdef CONFIG_XEN
733 +               xen_destroy_contiguous_region((unsigned long)page_address(page),
734 +                                             2);
735 +#endif
736                 __free_pages(page, 2);
737                 return NULL;
738         }
739 @@ -163,6 +174,9 @@ static void i8xx_destroy_pages(struct pa
740                 return;
741  
742         set_pages_wb(page, 4);
743 +#ifdef CONFIG_XEN
744 +       xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
745 +#endif
746         put_page(page);
747         __free_pages(page, 2);
748         atomic_dec(&agp_bridge->current_memory_agp);
749 --- head-2012-01-06.orig/drivers/char/mem.c     2012-01-06 10:21:23.000000000 +0100
750 +++ head-2012-01-06/drivers/char/mem.c  2011-11-16 17:01:34.000000000 +0100
751 @@ -87,6 +87,7 @@ void __weak unxlate_dev_mem_ptr(unsigned
752  {
753  }
754  
755 +#ifndef ARCH_HAS_DEV_MEM
756  /*
757   * This funcion reads the *physical* memory. The f_pos points directly to the
758   * memory location.
759 @@ -209,6 +210,7 @@ static ssize_t write_mem(struct file *fi
760         *ppos += written;
761         return written;
762  }
763 +#endif
764  
765  int __weak phys_mem_access_prot_allowed(struct file *file,
766         unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
767 @@ -335,6 +337,9 @@ static int mmap_mem(struct file *file, s
768  static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
769  {
770         unsigned long pfn;
771 +#ifdef CONFIG_XEN
772 +       unsigned long i, count;
773 +#endif
774  
775         /* Turn a kernel-virtual address into a physical page frame */
776         pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
777 @@ -349,6 +354,13 @@ static int mmap_kmem(struct file *file, 
778         if (!pfn_valid(pfn))
779                 return -EIO;
780  
781 +#ifdef CONFIG_XEN
782 +       count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
783 +       for (i = 0; i < count; i++)
784 +               if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i)))
785 +                       return -EIO;
786 +#endif
787 +
788         vma->vm_pgoff = pfn;
789         return mmap_mem(file, vma);
790  }
791 @@ -740,6 +752,7 @@ static int open_port(struct inode * inod
792  #define open_kmem      open_mem
793  #define open_oldmem    open_mem
794  
795 +#ifndef ARCH_HAS_DEV_MEM
796  static const struct file_operations mem_fops = {
797         .llseek         = memory_lseek,
798         .read           = read_mem,
799 @@ -748,6 +761,9 @@ static const struct file_operations mem_
800         .open           = open_mem,
801         .get_unmapped_area = get_unmapped_area_mem,
802  };
803 +#else
804 +extern const struct file_operations mem_fops;
805 +#endif
806  
807  #ifdef CONFIG_DEVKMEM
808  static const struct file_operations kmem_fops = {
809 --- head-2012-01-06.orig/drivers/char/tpm/Makefile      2012-01-06 10:21:23.000000000 +0100
810 +++ head-2012-01-06/drivers/char/tpm/Makefile   2011-04-13 11:25:29.000000000 +0200
811 @@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o
812  obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
813  obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
814  obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
815 +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
816 +tpm_xenu-y = tpm_xen.o tpm_vtpm.o
817 --- head-2012-01-06.orig/drivers/char/tpm/tpm.h 2012-01-06 10:21:23.000000000 +0100
818 +++ head-2012-01-06/drivers/char/tpm/tpm.h      2011-09-07 14:38:52.000000000 +0200
819 @@ -115,6 +115,9 @@ struct tpm_chip {
820         struct dentry **bios_dir;
821  
822         struct list_head list;
823 +#ifdef CONFIG_XEN
824 +       void *priv;
825 +#endif
826         void (*release) (struct device *);
827  };
828  
829 @@ -279,6 +282,18 @@ struct tpm_cmd_t {
830  
831  ssize_t        tpm_getcap(struct device *, __be32, cap_t *, const char *);
832  
833 +#ifdef CONFIG_XEN
834 +static inline void *chip_get_private(const struct tpm_chip *chip)
835 +{
836 +       return chip->priv;
837 +}
838 +
839 +static inline void chip_set_private(struct tpm_chip *chip, void *priv)
840 +{
841 +       chip->priv = priv;
842 +}
843 +#endif
844 +
845  extern void tpm_get_timeouts(struct tpm_chip *);
846  extern void tpm_gen_interrupt(struct tpm_chip *);
847  extern void tpm_continue_selftest(struct tpm_chip *);
848 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
849 +++ head-2012-01-06/drivers/char/tpm/tpm_vtpm.c 2011-01-31 14:53:38.000000000 +0100
850 @@ -0,0 +1,542 @@
851 +/*
852 + * Copyright (C) 2006 IBM Corporation
853 + *
854 + * Authors:
855 + * Stefan Berger <stefanb@us.ibm.com>
856 + *
857 + * Generic device driver part for device drivers in a virtualized
858 + * environment.
859 + *
860 + * This program is free software; you can redistribute it and/or
861 + * modify it under the terms of the GNU General Public License as
862 + * published by the Free Software Foundation, version 2 of the
863 + * License.
864 + *
865 + */
866 +
867 +#include <asm/uaccess.h>
868 +#include <linux/list.h>
869 +#include <linux/device.h>
870 +#include <linux/interrupt.h>
871 +#include <linux/platform_device.h>
872 +#include "tpm.h"
873 +#include "tpm_vtpm.h"
874 +
875 +/* read status bits */
876 +enum {
877 +       STATUS_BUSY = 0x01,
878 +       STATUS_DATA_AVAIL = 0x02,
879 +       STATUS_READY = 0x04
880 +};
881 +
882 +struct transmission {
883 +       struct list_head next;
884 +
885 +       unsigned char *request;
886 +       size_t  request_len;
887 +       size_t  request_buflen;
888 +
889 +       unsigned char *response;
890 +       size_t  response_len;
891 +       size_t  response_buflen;
892 +
893 +       unsigned int flags;
894 +};
895 +
896 +enum {
897 +       TRANSMISSION_FLAG_WAS_QUEUED = 0x1
898 +};
899 +
900 +
901 +enum {
902 +       DATAEX_FLAG_QUEUED_ONLY = 0x1
903 +};
904 +
905 +
906 +/* local variables */
907 +
908 +/* local function prototypes */
909 +static int _vtpm_send_queued(struct tpm_chip *chip);
910 +
911 +
912 +/* =============================================================
913 + * Some utility functions
914 + * =============================================================
915 + */
916 +static void vtpm_state_init(struct vtpm_state *vtpms)
917 +{
918 +       vtpms->current_request = NULL;
919 +       spin_lock_init(&vtpms->req_list_lock);
920 +       init_waitqueue_head(&vtpms->req_wait_queue);
921 +       INIT_LIST_HEAD(&vtpms->queued_requests);
922 +
923 +       vtpms->current_response = NULL;
924 +       spin_lock_init(&vtpms->resp_list_lock);
925 +       init_waitqueue_head(&vtpms->resp_wait_queue);
926 +
927 +       vtpms->disconnect_time = jiffies;
928 +}
929 +
930 +
931 +static inline struct transmission *transmission_alloc(void)
932 +{
933 +       return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
934 +}
935 +
936 +static unsigned char *
937 +transmission_set_req_buffer(struct transmission *t,
938 +                            unsigned char *buffer, size_t len)
939 +{
940 +       if (t->request_buflen < len) {
941 +               kfree(t->request);
942 +               t->request = kmalloc(len, GFP_KERNEL);
943 +               if (!t->request) {
944 +                       t->request_buflen = 0;
945 +                       return NULL;
946 +               }
947 +               t->request_buflen = len;
948 +       }
949 +
950 +       memcpy(t->request, buffer, len);
951 +       t->request_len = len;
952 +
953 +       return t->request;
954 +}
955 +
956 +static unsigned char *
957 +transmission_set_res_buffer(struct transmission *t,
958 +                            const unsigned char *buffer, size_t len)
959 +{
960 +       if (t->response_buflen < len) {
961 +               kfree(t->response);
962 +               t->response = kmalloc(len, GFP_ATOMIC);
963 +               if (!t->response) {
964 +                       t->response_buflen = 0;
965 +                       return NULL;
966 +               }
967 +               t->response_buflen = len;
968 +       }
969 +
970 +       memcpy(t->response, buffer, len);
971 +       t->response_len = len;
972 +
973 +       return t->response;
974 +}
975 +
976 +static inline void transmission_free(struct transmission *t)
977 +{
978 +       kfree(t->request);
979 +       kfree(t->response);
980 +       kfree(t);
981 +}
982 +
983 +/* =============================================================
984 + * Interface with the lower layer driver
985 + * =============================================================
986 + */
987 +/*
988 + * Lower layer uses this function to make a response available.
989 + */
990 +int vtpm_vd_recv(const struct tpm_chip *chip,
991 +                 const unsigned char *buffer, size_t count,
992 +                 void *ptr)
993 +{
994 +       unsigned long flags;
995 +       int ret_size = 0;
996 +       struct transmission *t;
997 +       struct vtpm_state *vtpms;
998 +
999 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1000 +
1001 +       /*
1002 +        * The list with requests must contain one request
1003 +        * only and the element there must be the one that
1004 +        * was passed to me from the front-end.
1005 +        */
1006 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1007 +       if (vtpms->current_request != ptr) {
1008 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1009 +               return 0;
1010 +       }
1011 +
1012 +       if ((t = vtpms->current_request)) {
1013 +               transmission_free(t);
1014 +               vtpms->current_request = NULL;
1015 +       }
1016 +
1017 +       t = transmission_alloc();
1018 +       if (t) {
1019 +               if (!transmission_set_res_buffer(t, buffer, count)) {
1020 +                       transmission_free(t);
1021 +                       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1022 +                       return -ENOMEM;
1023 +               }
1024 +               ret_size = count;
1025 +               vtpms->current_response = t;
1026 +               wake_up_interruptible(&vtpms->resp_wait_queue);
1027 +       }
1028 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1029 +
1030 +       return ret_size;
1031 +}
1032 +
1033 +
1034 +/*
1035 + * Lower layer indicates its status (connected/disconnected)
1036 + */
1037 +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
1038 +{
1039 +       struct vtpm_state *vtpms;
1040 +
1041 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1042 +
1043 +       vtpms->vd_status = vd_status;
1044 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1045 +               vtpms->disconnect_time = jiffies;
1046 +       }
1047 +}
1048 +
1049 +/* =============================================================
1050 + * Interface with the generic TPM driver
1051 + * =============================================================
1052 + */
1053 +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
1054 +{
1055 +       int rc = 0;
1056 +       unsigned long flags;
1057 +       struct vtpm_state *vtpms;
1058 +
1059 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1060 +
1061 +       /*
1062 +        * Check if the previous operation only queued the command
1063 +        * In this case there won't be a response, so I just
1064 +        * return from here and reset that flag. In any other
1065 +        * case I should receive a response from the back-end.
1066 +        */
1067 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1068 +       if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
1069 +               vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
1070 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1071 +               /*
1072 +                * The first few commands (measurements) must be
1073 +                * queued since it might not be possible to talk to the
1074 +                * TPM, yet.
1075 +                * Return a response of up to 30 '0's.
1076 +                */
1077 +
1078 +               count = min_t(size_t, count, 30);
1079 +               memset(buf, 0x0, count);
1080 +               return count;
1081 +       }
1082 +       /*
1083 +        * Check whether something is in the responselist and if
1084 +        * there's nothing in the list wait for something to appear.
1085 +        */
1086 +
1087 +       if (!vtpms->current_response) {
1088 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1089 +               interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
1090 +                                              1000);
1091 +               spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
1092 +       }
1093 +
1094 +       if (vtpms->current_response) {
1095 +               struct transmission *t = vtpms->current_response;
1096 +               vtpms->current_response = NULL;
1097 +               rc = min(count, t->response_len);
1098 +               memcpy(buf, t->response, rc);
1099 +               transmission_free(t);
1100 +       }
1101 +
1102 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1103 +       return rc;
1104 +}
1105 +
1106 +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
1107 +{
1108 +       int rc = 0;
1109 +       unsigned long flags;
1110 +       struct transmission *t = transmission_alloc();
1111 +       struct vtpm_state *vtpms;
1112 +
1113 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1114 +
1115 +       if (!t)
1116 +               return -ENOMEM;
1117 +       /*
1118 +        * If there's a current request, it must be the
1119 +        * previous request that has timed out.
1120 +        */
1121 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1122 +       if (vtpms->current_request != NULL) {
1123 +               printk("WARNING: Sending although there is a request outstanding.\n"
1124 +                      "         Previous request must have timed out.\n");
1125 +               transmission_free(vtpms->current_request);
1126 +               vtpms->current_request = NULL;
1127 +       }
1128 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1129 +
1130 +       /*
1131 +        * Queue the packet if the driver below is not
1132 +        * ready, yet, or there is any packet already
1133 +        * in the queue.
1134 +        * If the driver below is ready, unqueue all
1135 +        * packets first before sending our current
1136 +        * packet.
1137 +        * For each unqueued packet, except for the
1138 +        * last (=current) packet, call the function
1139 +        * tpm_xen_recv to wait for the response to come
1140 +        * back.
1141 +        */
1142 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1143 +               if (time_after(jiffies,
1144 +                              vtpms->disconnect_time + HZ * 10)) {
1145 +                       rc = -ENOENT;
1146 +               } else {
1147 +                       goto queue_it;
1148 +               }
1149 +       } else {
1150 +               /*
1151 +                * Send all queued packets.
1152 +                */
1153 +               if (_vtpm_send_queued(chip) == 0) {
1154 +
1155 +                       vtpms->current_request = t;
1156 +
1157 +                       rc = vtpm_vd_send(vtpms->tpm_private,
1158 +                                         buf,
1159 +                                         count,
1160 +                                         t);
1161 +                       /*
1162 +                        * The generic TPM driver will call
1163 +                        * the function to receive the response.
1164 +                        */
1165 +                       if (rc < 0) {
1166 +                               vtpms->current_request = NULL;
1167 +                               goto queue_it;
1168 +                       }
1169 +               } else {
1170 +queue_it:
1171 +                       if (!transmission_set_req_buffer(t, buf, count)) {
1172 +                               transmission_free(t);
1173 +                               rc = -ENOMEM;
1174 +                               goto exit;
1175 +                       }
1176 +                       /*
1177 +                        * An error occurred. Don't event try
1178 +                        * to send the current request. Just
1179 +                        * queue it.
1180 +                        */
1181 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1182 +                       vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
1183 +                       list_add_tail(&t->next, &vtpms->queued_requests);
1184 +                       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1185 +               }
1186 +       }
1187 +
1188 +exit:
1189 +       return rc;
1190 +}
1191 +
1192 +
1193 +/*
1194 + * Send all queued requests.
1195 + */
1196 +static int _vtpm_send_queued(struct tpm_chip *chip)
1197 +{
1198 +       int rc;
1199 +       int error = 0;
1200 +       long flags;
1201 +       unsigned char buffer[1];
1202 +       struct vtpm_state *vtpms;
1203 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1204 +
1205 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1206 +
1207 +       while (!list_empty(&vtpms->queued_requests)) {
1208 +               /*
1209 +                * Need to dequeue them.
1210 +                * Read the result into a dummy buffer.
1211 +                */
1212 +               struct transmission *qt = (struct transmission *)
1213 +                                         vtpms->queued_requests.next;
1214 +               list_del(&qt->next);
1215 +               vtpms->current_request = qt;
1216 +               spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1217 +
1218 +               rc = vtpm_vd_send(vtpms->tpm_private,
1219 +                                 qt->request,
1220 +                                 qt->request_len,
1221 +                                 qt);
1222 +
1223 +               if (rc < 0) {
1224 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1225 +                       if ((qt = vtpms->current_request) != NULL) {
1226 +                               /*
1227 +                                * requeue it at the beginning
1228 +                                * of the list
1229 +                                */
1230 +                               list_add(&qt->next,
1231 +                                        &vtpms->queued_requests);
1232 +                       }
1233 +                       vtpms->current_request = NULL;
1234 +                       error = 1;
1235 +                       break;
1236 +               }
1237 +               /*
1238 +                * After this point qt is not valid anymore!
1239 +                * It is freed when the front-end is delivering
1240 +                * the data by calling tpm_recv
1241 +                */
1242 +               /*
1243 +                * Receive response into provided dummy buffer
1244 +                */
1245 +               rc = vtpm_recv(chip, buffer, sizeof(buffer));
1246 +               spin_lock_irqsave(&vtpms->req_list_lock, flags);
1247 +       }
1248 +
1249 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1250 +
1251 +       return error;
1252 +}
1253 +
1254 +static void vtpm_cancel(struct tpm_chip *chip)
1255 +{
1256 +       unsigned long flags;
1257 +       struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
1258 +
1259 +       spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1260 +
1261 +       if (!vtpms->current_response && vtpms->current_request) {
1262 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1263 +               interruptible_sleep_on(&vtpms->resp_wait_queue);
1264 +               spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1265 +       }
1266 +
1267 +       if (vtpms->current_response) {
1268 +               struct transmission *t = vtpms->current_response;
1269 +               vtpms->current_response = NULL;
1270 +               transmission_free(t);
1271 +       }
1272 +
1273 +       spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
1274 +}
1275 +
1276 +static u8 vtpm_status(struct tpm_chip *chip)
1277 +{
1278 +       u8 rc = 0;
1279 +       unsigned long flags;
1280 +       struct vtpm_state *vtpms;
1281 +
1282 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1283 +
1284 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1285 +       /*
1286 +        * Data are available if:
1287 +        *  - there's a current response
1288 +        *  - the last packet was queued only (this is fake, but necessary to
1289 +        *      get the generic TPM layer to call the receive function.)
1290 +        */
1291 +       if (vtpms->current_response ||
1292 +           0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
1293 +               rc = STATUS_DATA_AVAIL;
1294 +       } else if (!vtpms->current_response && !vtpms->current_request) {
1295 +               rc = STATUS_READY;
1296 +       }
1297 +
1298 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1299 +       return rc;
1300 +}
1301 +
1302 +static struct file_operations vtpm_ops = {
1303 +       .owner = THIS_MODULE,
1304 +       .llseek = no_llseek,
1305 +       .open = tpm_open,
1306 +       .read = tpm_read,
1307 +       .write = tpm_write,
1308 +       .release = tpm_release,
1309 +};
1310 +
1311 +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
1312 +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
1313 +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
1314 +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
1315 +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
1316 +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
1317 +                  NULL);
1318 +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
1319 +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
1320 +
1321 +static struct attribute *vtpm_attrs[] = {
1322 +       &dev_attr_pubek.attr,
1323 +       &dev_attr_pcrs.attr,
1324 +       &dev_attr_enabled.attr,
1325 +       &dev_attr_active.attr,
1326 +       &dev_attr_owned.attr,
1327 +       &dev_attr_temp_deactivated.attr,
1328 +       &dev_attr_caps.attr,
1329 +       &dev_attr_cancel.attr,
1330 +       NULL,
1331 +};
1332 +
1333 +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
1334 +
1335 +#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
1336 +
1337 +static struct tpm_vendor_specific tpm_vtpm = {
1338 +       .recv = vtpm_recv,
1339 +       .send = vtpm_send,
1340 +       .cancel = vtpm_cancel,
1341 +       .status = vtpm_status,
1342 +       .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
1343 +       .req_complete_val  = STATUS_DATA_AVAIL,
1344 +       .req_canceled = STATUS_READY,
1345 +       .attr_group = &vtpm_attr_grp,
1346 +       .miscdev = {
1347 +               .fops = &vtpm_ops,
1348 +       },
1349 +       .duration = {
1350 +               TPM_LONG_TIMEOUT,
1351 +               TPM_LONG_TIMEOUT,
1352 +               TPM_LONG_TIMEOUT,
1353 +       },
1354 +};
1355 +
1356 +struct tpm_chip *init_vtpm(struct device *dev,
1357 +                           struct tpm_private *tp)
1358 +{
1359 +       long rc;
1360 +       struct tpm_chip *chip;
1361 +       struct vtpm_state *vtpms;
1362 +
1363 +       vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
1364 +       if (!vtpms)
1365 +               return ERR_PTR(-ENOMEM);
1366 +
1367 +       vtpm_state_init(vtpms);
1368 +       vtpms->tpm_private = tp;
1369 +
1370 +       chip = tpm_register_hardware(dev, &tpm_vtpm);
1371 +       if (!chip) {
1372 +               rc = -ENODEV;
1373 +               goto err_free_mem;
1374 +       }
1375 +
1376 +       chip_set_private(chip, vtpms);
1377 +
1378 +       return chip;
1379 +
1380 +err_free_mem:
1381 +       kfree(vtpms);
1382 +
1383 +       return ERR_PTR(rc);
1384 +}
1385 +
1386 +void cleanup_vtpm(struct device *dev)
1387 +{
1388 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1389 +       struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
1390 +       tpm_remove_hardware(dev);
1391 +       kfree(vtpms);
1392 +}
1393 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1394 +++ head-2012-01-06/drivers/char/tpm/tpm_vtpm.h 2011-01-31 14:53:38.000000000 +0100
1395 @@ -0,0 +1,55 @@
1396 +#ifndef TPM_VTPM_H
1397 +#define TPM_VTPM_H
1398 +
1399 +struct tpm_chip;
1400 +struct tpm_private;
1401 +
1402 +struct vtpm_state {
1403 +       struct transmission *current_request;
1404 +       spinlock_t           req_list_lock;
1405 +       wait_queue_head_t    req_wait_queue;
1406 +
1407 +       struct list_head     queued_requests;
1408 +
1409 +       struct transmission *current_response;
1410 +       spinlock_t           resp_list_lock;
1411 +       wait_queue_head_t    resp_wait_queue;     // processes waiting for responses
1412 +
1413 +       u8                   vd_status;
1414 +       u8                   flags;
1415 +
1416 +       unsigned long        disconnect_time;
1417 +
1418 +       /*
1419 +        * The following is a private structure of the underlying
1420 +        * driver. It is passed as parameter in the send function.
1421 +        */
1422 +       struct tpm_private *tpm_private;
1423 +};
1424 +
1425 +
1426 +enum vdev_status {
1427 +       TPM_VD_STATUS_DISCONNECTED = 0x0,
1428 +       TPM_VD_STATUS_CONNECTED = 0x1
1429 +};
1430 +
1431 +/* this function is called from tpm_vtpm.c */
1432 +int vtpm_vd_send(struct tpm_private * tp,
1433 +                 const u8 * buf, size_t count, void *ptr);
1434 +
1435 +/* these functions are offered by tpm_vtpm.c */
1436 +struct tpm_chip *init_vtpm(struct device *,
1437 +                           struct tpm_private *);
1438 +void cleanup_vtpm(struct device *);
1439 +int vtpm_vd_recv(const struct tpm_chip* chip,
1440 +                 const unsigned char *buffer, size_t count, void *ptr);
1441 +void vtpm_vd_status(const struct tpm_chip *, u8 status);
1442 +
1443 +static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
1444 +{
1445 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1446 +       struct vtpm_state *vtpms = chip_get_private(chip);
1447 +       return vtpms->tpm_private;
1448 +}
1449 +
1450 +#endif
1451 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1452 +++ head-2012-01-06/drivers/char/tpm/tpm_xen.c  2011-12-21 10:02:58.000000000 +0100
1453 @@ -0,0 +1,720 @@
1454 +/*
1455 + * Copyright (c) 2005, IBM Corporation
1456 + *
1457 + * Author: Stefan Berger, stefanb@us.ibm.com
1458 + * Grant table support: Mahadevan Gomathisankaran
1459 + *
1460 + * This code has been derived from drivers/xen/netfront/netfront.c
1461 + *
1462 + * Copyright (c) 2002-2004, K A Fraser
1463 + *
1464 + * This program is free software; you can redistribute it and/or
1465 + * modify it under the terms of the GNU General Public License version 2
1466 + * as published by the Free Software Foundation; or, when distributed
1467 + * separately from the Linux kernel or incorporated into other
1468 + * software packages, subject to the following license:
1469 + *
1470 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1471 + * of this source file (the "Software"), to deal in the Software without
1472 + * restriction, including without limitation the rights to use, copy, modify,
1473 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1474 + * and to permit persons to whom the Software is furnished to do so, subject to
1475 + * the following conditions:
1476 + *
1477 + * The above copyright notice and this permission notice shall be included in
1478 + * all copies or substantial portions of the Software.
1479 + *
1480 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1481 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1482 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1483 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1484 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1485 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1486 + * IN THE SOFTWARE.
1487 + */
1488 +
1489 +#include <linux/errno.h>
1490 +#include <linux/err.h>
1491 +#include <linux/interrupt.h>
1492 +#include <linux/mutex.h>
1493 +#include <asm/uaccess.h>
1494 +#include <xen/evtchn.h>
1495 +#include <xen/interface/grant_table.h>
1496 +#include <xen/interface/io/tpmif.h>
1497 +#include <xen/gnttab.h>
1498 +#include <xen/xenbus.h>
1499 +#include "tpm.h"
1500 +#include "tpm_vtpm.h"
1501 +
1502 +#undef DEBUG
1503 +
1504 +/* local structures */
1505 +struct tpm_private {
1506 +       struct tpm_chip *chip;
1507 +
1508 +       tpmif_tx_interface_t *tx;
1509 +       atomic_t refcnt;
1510 +       unsigned int irq;
1511 +       u8 is_connected;
1512 +       u8 is_suspended;
1513 +
1514 +       spinlock_t tx_lock;
1515 +
1516 +       struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
1517 +
1518 +       atomic_t tx_busy;
1519 +       void *tx_remember;
1520 +
1521 +       domid_t backend_id;
1522 +       wait_queue_head_t wait_q;
1523 +
1524 +       struct xenbus_device *dev;
1525 +       int ring_ref;
1526 +};
1527 +
1528 +struct tx_buffer {
1529 +       unsigned int size;      // available space in data
1530 +       unsigned int len;       // used space in data
1531 +       unsigned char *data;    // pointer to a page
1532 +};
1533 +
1534 +
1535 +/* locally visible variables */
1536 +static grant_ref_t gref_head;
1537 +static struct tpm_private *my_priv;
1538 +
1539 +/* local function prototypes */
1540 +static irqreturn_t tpmif_int(int irq,
1541 +                             void *tpm_priv,
1542 +                             struct pt_regs *ptregs);
1543 +static void tpmif_rx_action(unsigned long unused);
1544 +static int tpmif_connect(struct xenbus_device *dev,
1545 +                         struct tpm_private *tp,
1546 +                         domid_t domid);
1547 +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
1548 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
1549 +static void tpmif_free_tx_buffers(struct tpm_private *tp);
1550 +static void tpmif_set_connected_state(struct tpm_private *tp,
1551 +                                      u8 newstate);
1552 +static int tpm_xmit(struct tpm_private *tp,
1553 +                    const u8 * buf, size_t count, int userbuffer,
1554 +                    void *remember);
1555 +static void destroy_tpmring(struct tpm_private *tp);
1556 +void __exit tpmif_exit(void);
1557 +
1558 +#define DPRINTK(fmt, args...) \
1559 +    pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
1560 +#define IPRINTK(fmt, args...) \
1561 +    printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
1562 +#define WPRINTK(fmt, args...) \
1563 +    printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
1564 +
1565 +#define GRANT_INVALID_REF      0
1566 +
1567 +
1568 +static inline int
1569 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
1570 +               int isuserbuffer)
1571 +{
1572 +       int copied = len;
1573 +
1574 +       if (len > txb->size)
1575 +               copied = txb->size;
1576 +       if (isuserbuffer) {
1577 +               if (copy_from_user(txb->data, src, copied))
1578 +                       return -EFAULT;
1579 +       } else {
1580 +               memcpy(txb->data, src, copied);
1581 +       }
1582 +       txb->len = len;
1583 +       return copied;
1584 +}
1585 +
1586 +static inline struct tx_buffer *tx_buffer_alloc(void)
1587 +{
1588 +       struct tx_buffer *txb;
1589 +
1590 +       txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
1591 +       if (!txb)
1592 +               return NULL;
1593 +
1594 +       txb->len = 0;
1595 +       txb->size = PAGE_SIZE;
1596 +       txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1597 +       if (txb->data == NULL) {
1598 +               kfree(txb);
1599 +               txb = NULL;
1600 +       }
1601 +
1602 +       return txb;
1603 +}
1604 +
1605 +
1606 +static inline void tx_buffer_free(struct tx_buffer *txb)
1607 +{
1608 +       if (txb) {
1609 +               free_page((long)txb->data);
1610 +               kfree(txb);
1611 +       }
1612 +}
1613 +
1614 +/**************************************************************
1615 + Utility function for the tpm_private structure
1616 +**************************************************************/
1617 +static void tpm_private_init(struct tpm_private *tp)
1618 +{
1619 +       spin_lock_init(&tp->tx_lock);
1620 +       init_waitqueue_head(&tp->wait_q);
1621 +       atomic_set(&tp->refcnt, 1);
1622 +}
1623 +
1624 +static void tpm_private_put(void)
1625 +{
1626 +       if (!atomic_dec_and_test(&my_priv->refcnt))
1627 +               return;
1628 +
1629 +       tpmif_free_tx_buffers(my_priv);
1630 +       kfree(my_priv);
1631 +       my_priv = NULL;
1632 +}
1633 +
1634 +static struct tpm_private *tpm_private_get(void)
1635 +{
1636 +       int err;
1637 +
1638 +       if (my_priv) {
1639 +               atomic_inc(&my_priv->refcnt);
1640 +               return my_priv;
1641 +       }
1642 +
1643 +       my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1644 +       if (!my_priv)
1645 +               return NULL;
1646 +
1647 +       tpm_private_init(my_priv);
1648 +       err = tpmif_allocate_tx_buffers(my_priv);
1649 +       if (err < 0)
1650 +               tpm_private_put();
1651 +
1652 +       return my_priv;
1653 +}
1654 +
1655 +/**************************************************************
1656 +
1657 + The interface to let the tpm plugin register its callback
1658 + function and send data to another partition using this module
1659 +
1660 +**************************************************************/
1661 +
1662 +static DEFINE_MUTEX(suspend_lock);
1663 +/*
1664 + * Send data via this module by calling this function
1665 + */
1666 +int vtpm_vd_send(struct tpm_private *tp,
1667 +                 const u8 * buf, size_t count, void *ptr)
1668 +{
1669 +       int sent;
1670 +
1671 +       mutex_lock(&suspend_lock);
1672 +       sent = tpm_xmit(tp, buf, count, 0, ptr);
1673 +       mutex_unlock(&suspend_lock);
1674 +
1675 +       return sent;
1676 +}
1677 +
1678 +/**************************************************************
1679 + XENBUS support code
1680 +**************************************************************/
1681 +
1682 +static int setup_tpmring(struct xenbus_device *dev,
1683 +                         struct tpm_private *tp)
1684 +{
1685 +       tpmif_tx_interface_t *sring;
1686 +       int err;
1687 +
1688 +       tp->ring_ref = GRANT_INVALID_REF;
1689 +
1690 +       sring = (void *)__get_free_page(GFP_KERNEL);
1691 +       if (!sring) {
1692 +               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1693 +               return -ENOMEM;
1694 +       }
1695 +       tp->tx = sring;
1696 +
1697 +       err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1698 +       if (err < 0) {
1699 +               free_page((unsigned long)sring);
1700 +               tp->tx = NULL;
1701 +               xenbus_dev_fatal(dev, err, "allocating grant reference");
1702 +               goto fail;
1703 +       }
1704 +       tp->ring_ref = err;
1705 +
1706 +       err = tpmif_connect(dev, tp, dev->otherend_id);
1707 +       if (err)
1708 +               goto fail;
1709 +
1710 +       return 0;
1711 +fail:
1712 +       destroy_tpmring(tp);
1713 +       return err;
1714 +}
1715 +
1716 +
1717 +static void destroy_tpmring(struct tpm_private *tp)
1718 +{
1719 +       tpmif_set_connected_state(tp, 0);
1720 +
1721 +       if (tp->ring_ref != GRANT_INVALID_REF) {
1722 +               gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1723 +               tp->ring_ref = GRANT_INVALID_REF;
1724 +               tp->tx = NULL;
1725 +       }
1726 +
1727 +       if (tp->irq)
1728 +               unbind_from_irqhandler(tp->irq, tp);
1729 +
1730 +       tp->irq = 0;
1731 +}
1732 +
1733 +
1734 +static int talk_to_backend(struct xenbus_device *dev,
1735 +                           struct tpm_private *tp)
1736 +{
1737 +       const char *message = NULL;
1738 +       int err;
1739 +       struct xenbus_transaction xbt;
1740 +
1741 +       err = setup_tpmring(dev, tp);
1742 +       if (err) {
1743 +               xenbus_dev_fatal(dev, err, "setting up ring");
1744 +               goto out;
1745 +       }
1746 +
1747 +again:
1748 +       err = xenbus_transaction_start(&xbt);
1749 +       if (err) {
1750 +               xenbus_dev_fatal(dev, err, "starting transaction");
1751 +               goto destroy_tpmring;
1752 +       }
1753 +
1754 +       err = xenbus_printf(xbt, dev->nodename,
1755 +                           "ring-ref","%u", tp->ring_ref);
1756 +       if (err) {
1757 +               message = "writing ring-ref";
1758 +               goto abort_transaction;
1759 +       }
1760 +
1761 +       err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1762 +                           irq_to_evtchn_port(tp->irq));
1763 +       if (err) {
1764 +               message = "writing event-channel";
1765 +               goto abort_transaction;
1766 +       }
1767 +
1768 +       err = xenbus_transaction_end(xbt, 0);
1769 +       if (err == -EAGAIN)
1770 +               goto again;
1771 +       if (err) {
1772 +               xenbus_dev_fatal(dev, err, "completing transaction");
1773 +               goto destroy_tpmring;
1774 +       }
1775 +
1776 +       xenbus_switch_state(dev, XenbusStateConnected);
1777 +
1778 +       return 0;
1779 +
1780 +abort_transaction:
1781 +       xenbus_transaction_end(xbt, 1);
1782 +       if (message)
1783 +               xenbus_dev_error(dev, err, "%s", message);
1784 +destroy_tpmring:
1785 +       destroy_tpmring(tp);
1786 +out:
1787 +       return err;
1788 +}
1789 +
1790 +/**
1791 + * Callback received when the backend's state changes.
1792 + */
1793 +static void backend_changed(struct xenbus_device *dev,
1794 +                           enum xenbus_state backend_state)
1795 +{
1796 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1797 +       DPRINTK("\n");
1798 +
1799 +       switch (backend_state) {
1800 +       case XenbusStateInitialising:
1801 +       case XenbusStateInitWait:
1802 +       case XenbusStateInitialised:
1803 +       case XenbusStateReconfiguring:
1804 +       case XenbusStateReconfigured:
1805 +       case XenbusStateUnknown:
1806 +               break;
1807 +
1808 +       case XenbusStateConnected:
1809 +               tpmif_set_connected_state(tp, 1);
1810 +               break;
1811 +
1812 +       case XenbusStateClosing:
1813 +               tpmif_set_connected_state(tp, 0);
1814 +               xenbus_frontend_closed(dev);
1815 +               break;
1816 +
1817 +       case XenbusStateClosed:
1818 +               tpmif_set_connected_state(tp, 0);
1819 +               if (tp->is_suspended == 0)
1820 +                       device_unregister(&dev->dev);
1821 +               xenbus_frontend_closed(dev);
1822 +               break;
1823 +       }
1824 +}
1825 +
1826 +static int tpmfront_probe(struct xenbus_device *dev,
1827 +                          const struct xenbus_device_id *id)
1828 +{
1829 +       int err;
1830 +       int handle;
1831 +       struct tpm_private *tp = tpm_private_get();
1832 +
1833 +       if (!tp)
1834 +               return -ENOMEM;
1835 +
1836 +       tp->chip = init_vtpm(&dev->dev, tp);
1837 +       if (IS_ERR(tp->chip))
1838 +               return PTR_ERR(tp->chip);
1839 +
1840 +       err = xenbus_scanf(XBT_NIL, dev->nodename,
1841 +                          "handle", "%i", &handle);
1842 +       if (XENBUS_EXIST_ERR(err))
1843 +               return err;
1844 +
1845 +       if (err < 0) {
1846 +               xenbus_dev_fatal(dev,err,"reading virtual-device");
1847 +               return err;
1848 +       }
1849 +
1850 +       tp->dev = dev;
1851 +
1852 +       err = talk_to_backend(dev, tp);
1853 +       if (err) {
1854 +               tpm_private_put();
1855 +               return err;
1856 +       }
1857 +
1858 +       return 0;
1859 +}
1860 +
1861 +
1862 +static int tpmfront_remove(struct xenbus_device *dev)
1863 +{
1864 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1865 +       destroy_tpmring(tp);
1866 +       cleanup_vtpm(&dev->dev);
1867 +       return 0;
1868 +}
1869 +
1870 +static int tpmfront_suspend(struct xenbus_device *dev)
1871 +{
1872 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1873 +       u32 ctr;
1874 +
1875 +       /* Take the lock, preventing any application from sending. */
1876 +       mutex_lock(&suspend_lock);
1877 +       tp->is_suspended = 1;
1878 +
1879 +       for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1880 +               if ((ctr % 10) == 0)
1881 +                       printk("TPM-FE [INFO]: Waiting for outstanding "
1882 +                              "request.\n");
1883 +               /* Wait for a request to be responded to. */
1884 +               interruptible_sleep_on_timeout(&tp->wait_q, 100);
1885 +       }
1886 +
1887 +       return 0;
1888 +}
1889 +
1890 +static int tpmfront_suspend_finish(struct tpm_private *tp)
1891 +{
1892 +       tp->is_suspended = 0;
1893 +       /* Allow applications to send again. */
1894 +       mutex_unlock(&suspend_lock);
1895 +       return 0;
1896 +}
1897 +
1898 +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1899 +{
1900 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1901 +       return tpmfront_suspend_finish(tp);
1902 +}
1903 +
1904 +static int tpmfront_resume(struct xenbus_device *dev)
1905 +{
1906 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1907 +       destroy_tpmring(tp);
1908 +       return talk_to_backend(dev, tp);
1909 +}
1910 +
1911 +static int tpmif_connect(struct xenbus_device *dev,
1912 +                         struct tpm_private *tp,
1913 +                         domid_t domid)
1914 +{
1915 +       int err;
1916 +
1917 +       tp->backend_id = domid;
1918 +
1919 +       err = bind_listening_port_to_irqhandler(
1920 +               domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1921 +       if (err <= 0) {
1922 +               WPRINTK("bind_listening_port_to_irqhandler failed "
1923 +                       "(err=%d)\n", err);
1924 +               return err;
1925 +       }
1926 +       tp->irq = err;
1927 +
1928 +       return 0;
1929 +}
1930 +
1931 +static const struct xenbus_device_id tpmfront_ids[] = {
1932 +       { "vtpm" },
1933 +       { "" }
1934 +};
1935 +MODULE_ALIAS("xen:vtpm");
1936 +
1937 +static DEFINE_XENBUS_DRIVER(tpmfront, ,
1938 +       .probe = tpmfront_probe,
1939 +       .remove =  tpmfront_remove,
1940 +       .resume = tpmfront_resume,
1941 +       .otherend_changed = backend_changed,
1942 +       .suspend = tpmfront_suspend,
1943 +       .suspend_cancel = tpmfront_suspend_cancel,
1944 +);
1945 +
1946 +static int __init init_tpm_xenbus(void)
1947 +{
1948 +       return xenbus_register_frontend(&tpmfront_driver);
1949 +}
1950 +
1951 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1952 +{
1953 +       unsigned int i;
1954 +
1955 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1956 +               tp->tx_buffers[i] = tx_buffer_alloc();
1957 +               if (!tp->tx_buffers[i]) {
1958 +                       tpmif_free_tx_buffers(tp);
1959 +                       return -ENOMEM;
1960 +               }
1961 +       }
1962 +       return 0;
1963 +}
1964 +
1965 +static void tpmif_free_tx_buffers(struct tpm_private *tp)
1966 +{
1967 +       unsigned int i;
1968 +
1969 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1970 +               tx_buffer_free(tp->tx_buffers[i]);
1971 +}
1972 +
1973 +static void tpmif_rx_action(unsigned long priv)
1974 +{
1975 +       struct tpm_private *tp = (struct tpm_private *)priv;
1976 +       int i = 0;
1977 +       unsigned int received;
1978 +       unsigned int offset = 0;
1979 +       u8 *buffer;
1980 +       tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1981 +
1982 +       atomic_set(&tp->tx_busy, 0);
1983 +       wake_up_interruptible(&tp->wait_q);
1984 +
1985 +       received = tx->size;
1986 +
1987 +       buffer = kmalloc(received, GFP_ATOMIC);
1988 +       if (!buffer)
1989 +               return;
1990 +
1991 +       for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1992 +               struct tx_buffer *txb = tp->tx_buffers[i];
1993 +               tpmif_tx_request_t *tx;
1994 +               unsigned int tocopy;
1995 +
1996 +               tx = &tp->tx->ring[i].req;
1997 +               tocopy = tx->size;
1998 +               if (tocopy > PAGE_SIZE)
1999 +                       tocopy = PAGE_SIZE;
2000 +
2001 +               memcpy(&buffer[offset], txb->data, tocopy);
2002 +
2003 +               gnttab_release_grant_reference(&gref_head, tx->ref);
2004 +
2005 +               offset += tocopy;
2006 +       }
2007 +
2008 +       vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
2009 +       kfree(buffer);
2010 +}
2011 +
2012 +
2013 +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
2014 +{
2015 +       struct tpm_private *tp = tpm_priv;
2016 +       unsigned long flags;
2017 +
2018 +       spin_lock_irqsave(&tp->tx_lock, flags);
2019 +       tpmif_rx_tasklet.data = (unsigned long)tp;
2020 +       tasklet_schedule(&tpmif_rx_tasklet);
2021 +       spin_unlock_irqrestore(&tp->tx_lock, flags);
2022 +
2023 +       return IRQ_HANDLED;
2024 +}
2025 +
2026 +
2027 +static int tpm_xmit(struct tpm_private *tp,
2028 +                    const u8 * buf, size_t count, int isuserbuffer,
2029 +                    void *remember)
2030 +{
2031 +       tpmif_tx_request_t *tx;
2032 +       TPMIF_RING_IDX i;
2033 +       unsigned int offset = 0;
2034 +
2035 +       spin_lock_irq(&tp->tx_lock);
2036 +
2037 +       if (unlikely(atomic_read(&tp->tx_busy))) {
2038 +               printk("tpm_xmit: There's an outstanding request/response "
2039 +                      "on the way!\n");
2040 +               spin_unlock_irq(&tp->tx_lock);
2041 +               return -EBUSY;
2042 +       }
2043 +
2044 +       if (tp->is_connected != 1) {
2045 +               spin_unlock_irq(&tp->tx_lock);
2046 +               return -EIO;
2047 +       }
2048 +
2049 +       for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
2050 +               struct tx_buffer *txb = tp->tx_buffers[i];
2051 +               int copied;
2052 +
2053 +               if (!txb) {
2054 +                       DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
2055 +                               "Not transmitting anything!\n", i);
2056 +                       spin_unlock_irq(&tp->tx_lock);
2057 +                       return -EFAULT;
2058 +               }
2059 +
2060 +               copied = tx_buffer_copy(txb, &buf[offset], count,
2061 +                                       isuserbuffer);
2062 +               if (copied < 0) {
2063 +                       /* An error occurred */
2064 +                       spin_unlock_irq(&tp->tx_lock);
2065 +                       return copied;
2066 +               }
2067 +               count -= copied;
2068 +               offset += copied;
2069 +
2070 +               tx = &tp->tx->ring[i].req;
2071 +               tx->addr = virt_to_machine(txb->data);
2072 +               tx->size = txb->len;
2073 +               tx->unused = 0;
2074 +
2075 +               DPRINTK("First 4 characters sent by TPM-FE are "
2076 +                       "0x%02x 0x%02x 0x%02x 0x%02x\n",
2077 +                       txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
2078 +
2079 +               /* Get the granttable reference for this page. */
2080 +               tx->ref = gnttab_claim_grant_reference(&gref_head);
2081 +               if (tx->ref == -ENOSPC) {
2082 +                       spin_unlock_irq(&tp->tx_lock);
2083 +                       DPRINTK("Grant table claim reference failed in "
2084 +                               "func:%s line:%d file:%s\n",
2085 +                               __FUNCTION__, __LINE__, __FILE__);
2086 +                       return -ENOSPC;
2087 +               }
2088 +               gnttab_grant_foreign_access_ref(tx->ref,
2089 +                                               tp->backend_id,
2090 +                                               virt_to_mfn(txb->data),
2091 +                                               0 /*RW*/);
2092 +               wmb();
2093 +       }
2094 +
2095 +       atomic_set(&tp->tx_busy, 1);
2096 +       tp->tx_remember = remember;
2097 +
2098 +       mb();
2099 +
2100 +       notify_remote_via_irq(tp->irq);
2101 +
2102 +       spin_unlock_irq(&tp->tx_lock);
2103 +       return offset;
2104 +}
2105 +
2106 +
2107 +static void tpmif_notify_upperlayer(struct tpm_private *tp)
2108 +{
2109 +       /* Notify upper layer about the state of the connection to the BE. */
2110 +       vtpm_vd_status(tp->chip, (tp->is_connected
2111 +                                 ? TPM_VD_STATUS_CONNECTED
2112 +                                 : TPM_VD_STATUS_DISCONNECTED));
2113 +}
2114 +
2115 +
2116 +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
2117 +{
2118 +       /*
2119 +        * Don't notify upper layer if we are in suspend mode and
2120 +        * should disconnect - assumption is that we will resume
2121 +        * The mutex keeps apps from sending.
2122 +        */
2123 +       if (is_connected == 0 && tp->is_suspended == 1)
2124 +               return;
2125 +
2126 +       /*
2127 +        * Unlock the mutex if we are connected again
2128 +        * after being suspended - now resuming.
2129 +        * This also removes the suspend state.
2130 +        */
2131 +       if (is_connected == 1 && tp->is_suspended == 1)
2132 +               tpmfront_suspend_finish(tp);
2133 +
2134 +       if (is_connected != tp->is_connected) {
2135 +               tp->is_connected = is_connected;
2136 +               tpmif_notify_upperlayer(tp);
2137 +       }
2138 +}
2139 +
2140 +
2141 +
2142 +/* =================================================================
2143 + * Initialization function.
2144 + * =================================================================
2145 + */
2146 +
2147 +
2148 +static int __init tpmif_init(void)
2149 +{
2150 +       struct tpm_private *tp;
2151 +
2152 +       if (is_initial_xendomain())
2153 +               return -EPERM;
2154 +
2155 +       tp = tpm_private_get();
2156 +       if (!tp)
2157 +               return -ENOMEM;
2158 +
2159 +       IPRINTK("Initialising the vTPM driver.\n");
2160 +       if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
2161 +                                         &gref_head) < 0) {
2162 +               tpm_private_put();
2163 +               return -EFAULT;
2164 +       }
2165 +
2166 +       init_tpm_xenbus();
2167 +       return 0;
2168 +}
2169 +
2170 +
2171 +module_init(tpmif_init);
2172 +
2173 +MODULE_LICENSE("Dual BSD/GPL");
2174 --- head-2012-01-06.orig/drivers/edac/edac_mc.c 2012-01-06 10:21:23.000000000 +0100
2175 +++ head-2012-01-06/drivers/edac/edac_mc.c      2011-06-30 15:37:01.000000000 +0200
2176 @@ -611,6 +611,10 @@ static void edac_mc_scrub_block(unsigned
2177  
2178         debugf3("%s()\n", __func__);
2179  
2180 +#ifdef CONFIG_XEN
2181 +       page = mfn_to_local_pfn(page);
2182 +#endif
2183 +
2184         /* ECC error page was not in our memory. Ignore it. */
2185         if (!pfn_valid(page))
2186                 return;
2187 --- head-2012-01-06.orig/drivers/firmware/dell_rbu.c    2012-01-06 10:21:23.000000000 +0100
2188 +++ head-2012-01-06/drivers/firmware/dell_rbu.c 2011-04-13 11:25:29.000000000 +0200
2189 @@ -170,9 +170,28 @@ static int create_packet(void *data, siz
2190                         spin_lock(&rbu_data.lock);
2191                         goto out_alloc_packet_array;
2192                 }
2193 +#ifdef CONFIG_XEN
2194 +               if (ordernum && xen_create_contiguous_region(
2195 +                       (unsigned long)packet_data_temp_buf, ordernum, 0)) {
2196 +                       free_pages((unsigned long)packet_data_temp_buf,
2197 +                                  ordernum);
2198 +                       printk(KERN_WARNING
2199 +                               "dell_rbu:%s: failed to adjust new "
2200 +                               "packet\n", __func__);
2201 +                       retval = -ENOMEM;
2202 +                       spin_lock(&rbu_data.lock);
2203 +                       goto out_alloc_packet_array;
2204 +               }
2205 +#endif
2206  
2207 -               if ((unsigned long)virt_to_phys(packet_data_temp_buf)
2208 +               if ((unsigned long)virt_to_bus(packet_data_temp_buf)
2209                                 < allocation_floor) {
2210 +#ifdef CONFIG_XEN
2211 +                       if (ordernum)
2212 +                               xen_destroy_contiguous_region(
2213 +                                       (unsigned long)packet_data_temp_buf,
2214 +                                       ordernum);
2215 +#endif
2216                         pr_debug("packet 0x%lx below floor at 0x%lx.\n",
2217                                         (unsigned long)virt_to_phys(
2218                                                 packet_data_temp_buf),
2219 @@ -186,7 +205,7 @@ static int create_packet(void *data, siz
2220         newpacket->data = packet_data_temp_buf;
2221  
2222         pr_debug("create_packet: newpacket at physical addr %lx\n",
2223 -               (unsigned long)virt_to_phys(newpacket->data));
2224 +               (unsigned long)virt_to_bus(newpacket->data));
2225  
2226         /* packets may not have fixed size */
2227         newpacket->length = length;
2228 @@ -205,7 +224,7 @@ out_alloc_packet_array:
2229         /* always free packet array */
2230         for (;idx>0;idx--) {
2231                 pr_debug("freeing unused packet below floor 0x%lx.\n",
2232 -                       (unsigned long)virt_to_phys(
2233 +                       (unsigned long)virt_to_bus(
2234                                 invalid_addr_packet_array[idx-1]));
2235                 free_pages((unsigned long)invalid_addr_packet_array[idx-1],
2236                         ordernum);
2237 @@ -349,6 +368,13 @@ static void packet_empty_list(void)
2238                  * to make sure there are no stale RBU packets left in memory
2239                  */
2240                 memset(newpacket->data, 0, rbu_data.packetsize);
2241 +#ifdef CONFIG_XEN
2242 +               if (newpacket->ordernum)
2243 +                       xen_destroy_contiguous_region(
2244 +                               (unsigned long)newpacket->data,
2245 +                               newpacket->ordernum);
2246 +#endif
2247 +
2248                 free_pages((unsigned long) newpacket->data,
2249                         newpacket->ordernum);
2250                 kfree(newpacket);
2251 @@ -403,7 +429,9 @@ static int img_update_realloc(unsigned l
2252  {
2253         unsigned char *image_update_buffer = NULL;
2254         unsigned long rc;
2255 +#ifndef CONFIG_XEN
2256         unsigned long img_buf_phys_addr;
2257 +#endif
2258         int ordernum;
2259         int dma_alloc = 0;
2260  
2261 @@ -434,15 +462,19 @@ static int img_update_realloc(unsigned l
2262  
2263         spin_unlock(&rbu_data.lock);
2264  
2265 +#ifndef CONFIG_XEN
2266         ordernum = get_order(size);
2267         image_update_buffer =
2268                 (unsigned char *) __get_free_pages(GFP_KERNEL, ordernum);
2269  
2270         img_buf_phys_addr =
2271 -               (unsigned long) virt_to_phys(image_update_buffer);
2272 +               (unsigned long) virt_to_bus(image_update_buffer);
2273  
2274         if (img_buf_phys_addr > BIOS_SCAN_LIMIT) {
2275                 free_pages((unsigned long) image_update_buffer, ordernum);
2276 +#else
2277 +       {
2278 +#endif
2279                 ordernum = -1;
2280                 image_update_buffer = dma_alloc_coherent(NULL, size,
2281                         &dell_rbu_dmaaddr, GFP_KERNEL);
2282 @@ -695,6 +727,12 @@ static struct bin_attribute rbu_packet_s
2283  static int __init dcdrbu_init(void)
2284  {
2285         int rc;
2286 +
2287 +#ifdef CONFIG_XEN
2288 +       if (!is_initial_xendomain())
2289 +               return -ENODEV;
2290 +#endif
2291 +
2292         spin_lock_init(&rbu_data.lock);
2293  
2294         init_packet_head();
2295 --- head-2012-01-06.orig/drivers/ide/ide-lib.c  2012-01-06 10:21:23.000000000 +0100
2296 +++ head-2012-01-06/drivers/ide/ide-lib.c       2011-11-16 17:01:48.000000000 +0100
2297 @@ -19,12 +19,12 @@ void ide_toggle_bounce(ide_drive_t *driv
2298  {
2299         u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
2300  
2301 -       if (!PCI_DMA_BUS_IS_PHYS) {
2302 -               addr = BLK_BOUNCE_ANY;
2303 -       } else if (on && drive->media == ide_disk) {
2304 +       if (on && drive->media == ide_disk) {
2305                 struct device *dev = drive->hwif->dev;
2306  
2307 -               if (dev && dev->dma_mask)
2308 +               if (!PCI_DMA_BUS_IS_PHYS)
2309 +                       addr = BLK_BOUNCE_ANY;
2310 +               else if (dev && dev->dma_mask)
2311                         addr = *dev->dma_mask;
2312         }
2313  
2314 --- head-2012-01-06.orig/drivers/oprofile/buffer_sync.c 2012-01-06 10:21:23.000000000 +0100
2315 +++ head-2012-01-06/drivers/oprofile/buffer_sync.c      2011-06-30 15:37:10.000000000 +0200
2316 @@ -8,6 +8,10 @@
2317   * @author Barry Kasindorf
2318   * @author Robert Richter <robert.richter@amd.com>
2319   *
2320 + * Modified by Aravind Menon for Xen
2321 + * These modifications are:
2322 + * Copyright (C) 2005 Hewlett-Packard Co.
2323 + *
2324   * This is the core of the buffer management. Each
2325   * CPU buffer is processed and entered into the
2326   * global event buffer. Such processing is necessary
2327 @@ -43,6 +47,8 @@ static cpumask_var_t marked_cpus;
2328  static DEFINE_SPINLOCK(task_mortuary);
2329  static void process_task_mortuary(void);
2330  
2331 +static int cpu_current_domain[NR_CPUS];
2332 +
2333  /* Take ownership of the task struct and place it on the
2334   * list for processing. Only after two full buffer syncs
2335   * does the task eventually get freed, because by then
2336 @@ -61,7 +67,6 @@ task_free_notify(struct notifier_block *
2337         return NOTIFY_OK;
2338  }
2339  
2340 -
2341  /* The task is on its way out. A sync of the buffer means we can catch
2342   * any remaining samples for this task.
2343   */
2344 @@ -151,6 +156,11 @@ static void free_all_tasks(void)
2345  int sync_start(void)
2346  {
2347         int err;
2348 +       int i;
2349 +
2350 +       for (i = 0; i < NR_CPUS; i++) {
2351 +               cpu_current_domain[i] = COORDINATOR_DOMAIN;
2352 +       }
2353  
2354         if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
2355                 return -ENOMEM;
2356 @@ -287,13 +297,29 @@ static void add_cpu_switch(int i)
2357         last_cookie = INVALID_COOKIE;
2358  }
2359  
2360 -static void add_kernel_ctx_switch(unsigned int in_kernel)
2361 +static void add_cpu_mode_switch(unsigned int cpu_mode)
2362  {
2363         add_event_entry(ESCAPE_CODE);
2364 -       if (in_kernel)
2365 +       switch (cpu_mode) {
2366 +       case CPU_MODE_USER:
2367 +               add_event_entry(USER_ENTER_SWITCH_CODE);
2368 +               break;
2369 +       case CPU_MODE_KERNEL:
2370                 add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2371 -       else
2372 -               add_event_entry(KERNEL_EXIT_SWITCH_CODE);
2373 +               break;
2374 +       case CPU_MODE_XEN:
2375 +               add_event_entry(XEN_ENTER_SWITCH_CODE);
2376 +               break;
2377 +       default:
2378 +               break;
2379 +       }
2380 +}
2381 +
2382 +static void add_domain_switch(unsigned long domain_id)
2383 +{
2384 +       add_event_entry(ESCAPE_CODE);
2385 +       add_event_entry(DOMAIN_SWITCH_CODE);
2386 +       add_event_entry(domain_id);
2387  }
2388  
2389  static void
2390 @@ -374,12 +400,12 @@ static inline void add_sample_entry(unsi
2391   * for later lookup from userspace. Return 0 on failure.
2392   */
2393  static int
2394 -add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
2395 +add_sample(struct mm_struct *mm, struct op_sample *s, int cpu_mode)
2396  {
2397         unsigned long cookie;
2398         off_t offset;
2399  
2400 -       if (in_kernel) {
2401 +       if (cpu_mode >= CPU_MODE_KERNEL) {
2402                 add_sample_entry(s->eip, s->event);
2403                 return 1;
2404         }
2405 @@ -504,9 +530,10 @@ void sync_buffer(int cpu)
2406         unsigned long val;
2407         struct task_struct *new;
2408         unsigned long cookie = 0;
2409 -       int in_kernel = 1;
2410 +       int cpu_mode = CPU_MODE_KERNEL;
2411         sync_buffer_state state = sb_buffer_start;
2412         unsigned int i;
2413 +       int domain_switch = 0;
2414         unsigned long available;
2415         unsigned long flags;
2416         struct op_entry entry;
2417 @@ -516,6 +543,11 @@ void sync_buffer(int cpu)
2418  
2419         add_cpu_switch(cpu);
2420  
2421 +       /* We need to assign the first samples in this CPU buffer to the
2422 +          same domain that we were processing at the last sync_buffer */
2423 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2424 +               add_domain_switch(cpu_current_domain[cpu]);
2425 +
2426         op_cpu_buffer_reset(cpu);
2427         available = op_cpu_buffer_entries(cpu);
2428  
2429 @@ -524,6 +556,13 @@ void sync_buffer(int cpu)
2430                 if (!sample)
2431                         break;
2432  
2433 +               if (domain_switch) {
2434 +                       cpu_current_domain[cpu] = sample->eip;
2435 +                       add_domain_switch(sample->eip);
2436 +                       domain_switch = 0;
2437 +                       continue;
2438 +               }
2439 +
2440                 if (is_code(sample->eip)) {
2441                         flags = sample->event;
2442                         if (flags & TRACE_BEGIN) {
2443 @@ -532,10 +571,10 @@ void sync_buffer(int cpu)
2444                         }
2445                         if (flags & KERNEL_CTX_SWITCH) {
2446                                 /* kernel/userspace switch */
2447 -                               in_kernel = flags & IS_KERNEL;
2448 +                               cpu_mode = flags & CPU_MODE_MASK;
2449                                 if (state == sb_buffer_start)
2450                                         state = sb_sample_start;
2451 -                               add_kernel_ctx_switch(flags & IS_KERNEL);
2452 +                               add_cpu_mode_switch(cpu_mode);
2453                         }
2454                         if (flags & USER_CTX_SWITCH
2455                             && op_cpu_buffer_get_data(&entry, &val)) {
2456 @@ -548,16 +587,23 @@ void sync_buffer(int cpu)
2457                                         cookie = get_exec_dcookie(mm);
2458                                 add_user_ctx_switch(new, cookie);
2459                         }
2460 +                       if (flags & DOMAIN_SWITCH)
2461 +                               domain_switch = 1;
2462                         if (op_cpu_buffer_get_size(&entry))
2463                                 add_data(&entry, mm);
2464                         continue;
2465                 }
2466  
2467 +               if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2468 +                       add_sample_entry(sample->eip, sample->event);
2469 +                       continue;
2470 +               }
2471 +
2472                 if (state < sb_bt_start)
2473                         /* ignore sample */
2474                         continue;
2475  
2476 -               if (add_sample(mm, sample, in_kernel))
2477 +               if (add_sample(mm, sample, cpu_mode))
2478                         continue;
2479  
2480                 /* ignore backtraces if failed to add a sample */
2481 @@ -568,6 +614,10 @@ void sync_buffer(int cpu)
2482         }
2483         release_mm(mm);
2484  
2485 +       /* We reset domain to COORDINATOR at each CPU switch */
2486 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2487 +               add_domain_switch(COORDINATOR_DOMAIN);
2488 +
2489         mark_done(cpu);
2490  
2491         mutex_unlock(&buffer_mutex);
2492 --- head-2012-01-06.orig/drivers/oprofile/cpu_buffer.c  2012-01-06 10:21:23.000000000 +0100
2493 +++ head-2012-01-06/drivers/oprofile/cpu_buffer.c       2011-04-13 11:25:29.000000000 +0200
2494 @@ -8,6 +8,10 @@
2495   * @author Barry Kasindorf <barry.kasindorf@amd.com>
2496   * @author Robert Richter <robert.richter@amd.com>
2497   *
2498 + * Modified by Aravind Menon for Xen
2499 + * These modifications are:
2500 + * Copyright (C) 2005 Hewlett-Packard Co.
2501 + *
2502   * Each CPU has a local buffer that stores PC value/event
2503   * pairs. We also log context switches when we notice them.
2504   * Eventually each CPU's buffer is processed into the global
2505 @@ -38,6 +42,8 @@ static void wq_sync_buffer(struct work_s
2506  #define DEFAULT_TIMER_EXPIRE (HZ / 10)
2507  static int work_enabled;
2508  
2509 +static int32_t current_domain = COORDINATOR_DOMAIN;
2510 +
2511  unsigned long oprofile_get_cpu_buffer_size(void)
2512  {
2513         return oprofile_cpu_buffer_size;
2514 @@ -75,7 +81,7 @@ int alloc_cpu_buffers(void)
2515                 struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
2516  
2517                 b->last_task = NULL;
2518 -               b->last_is_kernel = -1;
2519 +               b->last_cpu_mode = -1;
2520                 b->tracing = 0;
2521                 b->buffer_size = buffer_size;
2522                 b->sample_received = 0;
2523 @@ -180,7 +186,7 @@ unsigned long op_cpu_buffer_entries(int 
2524  
2525  static int
2526  op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
2527 -           int is_kernel, struct task_struct *task)
2528 +           int cpu_mode, struct task_struct *task)
2529  {
2530         struct op_entry entry;
2531         struct op_sample *sample;
2532 @@ -193,16 +199,15 @@ op_add_code(struct oprofile_cpu_buffer *
2533                 flags |= TRACE_BEGIN;
2534  
2535         /* notice a switch from user->kernel or vice versa */
2536 -       is_kernel = !!is_kernel;
2537 -       if (cpu_buf->last_is_kernel != is_kernel) {
2538 -               cpu_buf->last_is_kernel = is_kernel;
2539 -               flags |= KERNEL_CTX_SWITCH;
2540 -               if (is_kernel)
2541 -                       flags |= IS_KERNEL;
2542 +       if (cpu_buf->last_cpu_mode != cpu_mode) {
2543 +               cpu_buf->last_cpu_mode = cpu_mode;
2544 +               flags |= KERNEL_CTX_SWITCH | cpu_mode;
2545         }
2546  
2547         /* notice a task switch */
2548 -       if (cpu_buf->last_task != task) {
2549 +       /* if not processing other domain samples */
2550 +       if (cpu_buf->last_task != task &&
2551 +           current_domain == COORDINATOR_DOMAIN) {
2552                 cpu_buf->last_task = task;
2553                 flags |= USER_CTX_SWITCH;
2554         }
2555 @@ -251,14 +256,14 @@ op_add_sample(struct oprofile_cpu_buffer
2556  /*
2557   * This must be safe from any context.
2558   *
2559 - * is_kernel is needed because on some architectures you cannot
2560 + * cpu_mode is needed because on some architectures you cannot
2561   * tell if you are in kernel or user space simply by looking at
2562 - * pc. We tag this in the buffer by generating kernel enter/exit
2563 - * events whenever is_kernel changes
2564 + * pc. We tag this in the buffer by generating kernel/user (and
2565 + * xen) enter events whenever cpu_mode changes
2566   */
2567  static int
2568  log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
2569 -          unsigned long backtrace, int is_kernel, unsigned long event,
2570 +          unsigned long backtrace, int cpu_mode, unsigned long event,
2571            struct task_struct *task)
2572  {
2573         struct task_struct *tsk = task ? task : current;
2574 @@ -269,7 +274,7 @@ log_sample(struct oprofile_cpu_buffer *c
2575                 return 0;
2576         }
2577  
2578 -       if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
2579 +       if (op_add_code(cpu_buf, backtrace, cpu_mode, tsk))
2580                 goto fail;
2581  
2582         if (op_add_sample(cpu_buf, pc, event))
2583 @@ -440,6 +445,25 @@ fail:
2584         return;
2585  }
2586  
2587 +int oprofile_add_domain_switch(int32_t domain_id)
2588 +{
2589 +       struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
2590 +
2591 +       /* should have space for switching into and out of domain
2592 +          (2 slots each) plus one sample and one cpu mode switch */
2593 +       if (((nr_available_slots(cpu_buf) < 6) &&
2594 +            (domain_id != COORDINATOR_DOMAIN)) ||
2595 +           (nr_available_slots(cpu_buf) < 2))
2596 +               return 0;
2597 +
2598 +       add_code(cpu_buf, DOMAIN_SWITCH);
2599 +       add_sample(cpu_buf, domain_id, 0);
2600 +
2601 +       current_domain = domain_id;
2602 +
2603 +       return 1;
2604 +}
2605 +
2606  /*
2607   * This serves to avoid cpu buffer overflow, and makes sure
2608   * the task mortuary progresses
2609 --- head-2012-01-06.orig/drivers/oprofile/cpu_buffer.h  2012-01-06 10:21:23.000000000 +0100
2610 +++ head-2012-01-06/drivers/oprofile/cpu_buffer.h       2011-04-13 11:25:29.000000000 +0200
2611 @@ -41,7 +41,7 @@ struct op_entry;
2612  struct oprofile_cpu_buffer {
2613         unsigned long buffer_size;
2614         struct task_struct *last_task;
2615 -       int last_is_kernel;
2616 +       int last_cpu_mode;
2617         int tracing;
2618         unsigned long sample_received;
2619         unsigned long sample_lost_overflow;
2620 @@ -63,7 +63,7 @@ static inline void op_cpu_buffer_reset(i
2621  {
2622         struct oprofile_cpu_buffer *cpu_buf = &per_cpu(op_cpu_buffer, cpu);
2623  
2624 -       cpu_buf->last_is_kernel = -1;
2625 +       cpu_buf->last_cpu_mode = -1;
2626         cpu_buf->last_task = NULL;
2627  }
2628  
2629 @@ -113,9 +113,13 @@ int op_cpu_buffer_get_data(struct op_ent
2630  }
2631  
2632  /* extra data flags */
2633 -#define KERNEL_CTX_SWITCH      (1UL << 0)
2634 -#define IS_KERNEL              (1UL << 1)
2635 +#define CPU_MODE_USER          0
2636 +#define CPU_MODE_KERNEL                1
2637 +#define CPU_MODE_XEN           2
2638 +#define CPU_MODE_MASK          3
2639  #define TRACE_BEGIN            (1UL << 2)
2640  #define USER_CTX_SWITCH                (1UL << 3)
2641 +#define KERNEL_CTX_SWITCH      (1UL << 4)
2642 +#define DOMAIN_SWITCH          (1UL << 5)
2643  
2644  #endif /* OPROFILE_CPU_BUFFER_H */
2645 --- head-2012-01-06.orig/drivers/oprofile/event_buffer.h        2012-01-06 10:21:23.000000000 +0100
2646 +++ head-2012-01-06/drivers/oprofile/event_buffer.h     2011-04-13 11:25:29.000000000 +0200
2647 @@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void);
2648  #define INVALID_COOKIE ~0UL
2649  #define NO_COOKIE 0UL
2650  
2651 +/* Constant used to refer to coordinator domain (Xen) */
2652 +#define COORDINATOR_DOMAIN -1
2653 +
2654  extern const struct file_operations event_buffer_fops;
2655  
2656  /* mutex between sync_cpu_buffers() and the
2657 --- head-2012-01-06.orig/drivers/oprofile/oprof.c       2012-01-06 10:21:23.000000000 +0100
2658 +++ head-2012-01-06/drivers/oprofile/oprof.c    2011-04-13 11:25:29.000000000 +0200
2659 @@ -5,6 +5,10 @@
2660   * @remark Read the file COPYING
2661   *
2662   * @author John Levon <levon@movementarian.org>
2663 + *
2664 + * Modified by Aravind Menon for Xen
2665 + * These modifications are:
2666 + * Copyright (C) 2005 Hewlett-Packard Co.
2667   */
2668  
2669  #include <linux/kernel.h>
2670 @@ -35,6 +39,32 @@ static DEFINE_MUTEX(start_mutex);
2671   */
2672  static int timer = 0;
2673  
2674 +int oprofile_set_active(int active_domains[], unsigned int adomains)
2675 +{
2676 +       int err;
2677 +
2678 +       if (!oprofile_ops.set_active)
2679 +               return -EINVAL;
2680 +
2681 +       mutex_lock(&start_mutex);
2682 +       err = oprofile_ops.set_active(active_domains, adomains);
2683 +       mutex_unlock(&start_mutex);
2684 +       return err;
2685 +}
2686 +
2687 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
2688 +{
2689 +       int err;
2690 +
2691 +       if (!oprofile_ops.set_passive)
2692 +               return -EINVAL;
2693 +
2694 +       mutex_lock(&start_mutex);
2695 +       err = oprofile_ops.set_passive(passive_domains, pdomains);
2696 +       mutex_unlock(&start_mutex);
2697 +       return err;
2698 +}
2699 +
2700  int oprofile_setup(void)
2701  {
2702         int err;
2703 --- head-2012-01-06.orig/drivers/oprofile/oprof.h       2012-01-06 10:21:23.000000000 +0100
2704 +++ head-2012-01-06/drivers/oprofile/oprof.h    2011-04-13 11:25:29.000000000 +0200
2705 @@ -40,4 +40,7 @@ void oprofile_timer_exit(void);
2706  int oprofile_set_ulong(unsigned long *addr, unsigned long val);
2707  int oprofile_set_timeout(unsigned long time);
2708  
2709 +int oprofile_set_active(int active_domains[], unsigned int adomains);
2710 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
2711 +
2712  #endif /* OPROF_H */
2713 --- head-2012-01-06.orig/drivers/oprofile/oprofile_files.c      2012-01-06 10:21:23.000000000 +0100
2714 +++ head-2012-01-06/drivers/oprofile/oprofile_files.c   2012-01-06 10:31:20.000000000 +0100
2715 @@ -5,11 +5,17 @@
2716   * @remark Read the file COPYING
2717   *
2718   * @author John Levon <levon@movementarian.org>
2719 + *
2720 + * Modified by Aravind Menon for Xen
2721 + * These modifications are:
2722 + * Copyright (C) 2005 Hewlett-Packard Co.
2723   */
2724  
2725  #include <linux/fs.h>
2726  #include <linux/oprofile.h>
2727  #include <linux/jiffies.h>
2728 +#include <asm/uaccess.h>
2729 +#include <linux/ctype.h>
2730  
2731  #include "event_buffer.h"
2732  #include "oprofile_stats.h"
2733 @@ -175,6 +181,195 @@ static const struct file_operations dump
2734         .llseek         = noop_llseek,
2735  };
2736  
2737 +#define TMPBUFSIZE 512
2738 +
2739 +static unsigned int adomains = 0;
2740 +static int active_domains[MAX_OPROF_DOMAINS + 1];
2741 +static DEFINE_MUTEX(adom_mutex);
2742 +
2743 +static ssize_t adomain_write(struct file * file, char const __user * buf,
2744 +                            size_t count, loff_t * offset)
2745 +{
2746 +       char *tmpbuf;
2747 +       char *startp, *endp;
2748 +       int i;
2749 +       unsigned long val;
2750 +       ssize_t retval = count;
2751 +
2752 +       if (*offset)
2753 +               return -EINVAL;
2754 +       if (count > TMPBUFSIZE - 1)
2755 +               return -EINVAL;
2756 +
2757 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2758 +               return -ENOMEM;
2759 +
2760 +       if (copy_from_user(tmpbuf, buf, count)) {
2761 +               kfree(tmpbuf);
2762 +               return -EFAULT;
2763 +       }
2764 +       tmpbuf[count] = 0;
2765 +
2766 +       mutex_lock(&adom_mutex);
2767 +
2768 +       startp = tmpbuf;
2769 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2770 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2771 +               val = simple_strtoul(startp, &endp, 0);
2772 +               if (endp == startp)
2773 +                       break;
2774 +               while (ispunct(*endp) || isspace(*endp))
2775 +                       endp++;
2776 +               active_domains[i] = val;
2777 +               if (active_domains[i] != val)
2778 +                       /* Overflow, force error below */
2779 +                       i = MAX_OPROF_DOMAINS + 1;
2780 +               startp = endp;
2781 +       }
2782 +       /* Force error on trailing junk */
2783 +       adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2784 +
2785 +       kfree(tmpbuf);
2786 +
2787 +       if (adomains > MAX_OPROF_DOMAINS
2788 +           || oprofile_set_active(active_domains, adomains)) {
2789 +               adomains = 0;
2790 +               retval = -EINVAL;
2791 +       }
2792 +
2793 +       mutex_unlock(&adom_mutex);
2794 +       return retval;
2795 +}
2796 +
2797 +static ssize_t adomain_read(struct file * file, char __user * buf,
2798 +                           size_t count, loff_t * offset)
2799 +{
2800 +       char * tmpbuf;
2801 +       size_t len;
2802 +       int i;
2803 +       ssize_t retval;
2804 +
2805 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2806 +               return -ENOMEM;
2807 +
2808 +       mutex_lock(&adom_mutex);
2809 +
2810 +       len = 0;
2811 +       for (i = 0; i < adomains; i++)
2812 +               len += snprintf(tmpbuf + len,
2813 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2814 +                               "%u ", active_domains[i]);
2815 +       WARN_ON(len > TMPBUFSIZE);
2816 +       if (len != 0 && len <= TMPBUFSIZE)
2817 +               tmpbuf[len-1] = '\n';
2818 +
2819 +       mutex_unlock(&adom_mutex);
2820 +
2821 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2822 +
2823 +       kfree(tmpbuf);
2824 +       return retval;
2825 +}
2826 +
2827 +
2828 +static const struct file_operations active_domain_ops = {
2829 +       .read           = adomain_read,
2830 +       .write          = adomain_write,
2831 +};
2832 +
2833 +static unsigned int pdomains = 0;
2834 +static int passive_domains[MAX_OPROF_DOMAINS];
2835 +static DEFINE_MUTEX(pdom_mutex);
2836 +
2837 +static ssize_t pdomain_write(struct file * file, char const __user * buf,
2838 +                            size_t count, loff_t * offset)
2839 +{
2840 +       char *tmpbuf;
2841 +       char *startp, *endp;
2842 +       int i;
2843 +       unsigned long val;
2844 +       ssize_t retval = count;
2845 +
2846 +       if (*offset)
2847 +               return -EINVAL;
2848 +       if (count > TMPBUFSIZE - 1)
2849 +               return -EINVAL;
2850 +
2851 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2852 +               return -ENOMEM;
2853 +
2854 +       if (copy_from_user(tmpbuf, buf, count)) {
2855 +               kfree(tmpbuf);
2856 +               return -EFAULT;
2857 +       }
2858 +       tmpbuf[count] = 0;
2859 +
2860 +       mutex_lock(&pdom_mutex);
2861 +
2862 +       startp = tmpbuf;
2863 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2864 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2865 +               val = simple_strtoul(startp, &endp, 0);
2866 +               if (endp == startp)
2867 +                       break;
2868 +               while (ispunct(*endp) || isspace(*endp))
2869 +                       endp++;
2870 +               passive_domains[i] = val;
2871 +               if (passive_domains[i] != val)
2872 +                       /* Overflow, force error below */
2873 +                       i = MAX_OPROF_DOMAINS + 1;
2874 +               startp = endp;
2875 +       }
2876 +       /* Force error on trailing junk */
2877 +       pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2878 +
2879 +       kfree(tmpbuf);
2880 +
2881 +       if (pdomains > MAX_OPROF_DOMAINS
2882 +           || oprofile_set_passive(passive_domains, pdomains)) {
2883 +               pdomains = 0;
2884 +               retval = -EINVAL;
2885 +       }
2886 +
2887 +       mutex_unlock(&pdom_mutex);
2888 +       return retval;
2889 +}
2890 +
2891 +static ssize_t pdomain_read(struct file * file, char __user * buf,
2892 +                           size_t count, loff_t * offset)
2893 +{
2894 +       char * tmpbuf;
2895 +       size_t len;
2896 +       int i;
2897 +       ssize_t retval;
2898 +
2899 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2900 +               return -ENOMEM;
2901 +
2902 +       mutex_lock(&pdom_mutex);
2903 +
2904 +       len = 0;
2905 +       for (i = 0; i < pdomains; i++)
2906 +               len += snprintf(tmpbuf + len,
2907 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2908 +                               "%u ", passive_domains[i]);
2909 +       WARN_ON(len > TMPBUFSIZE);
2910 +       if (len != 0 && len <= TMPBUFSIZE)
2911 +               tmpbuf[len-1] = '\n';
2912 +
2913 +       mutex_unlock(&pdom_mutex);
2914 +
2915 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2916 +
2917 +       kfree(tmpbuf);
2918 +       return retval;
2919 +}
2920 +
2921 +static const struct file_operations passive_domain_ops = {
2922 +       .read           = pdomain_read,
2923 +       .write          = pdomain_write,
2924 +};
2925 +
2926  void oprofile_create_files(struct super_block *sb, struct dentry *root)
2927  {
2928         /* reinitialize default values */
2929 @@ -185,6 +380,8 @@ void oprofile_create_files(struct super_
2930  
2931         oprofilefs_create_file(sb, root, "enable", &enable_fops);
2932         oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2933 +       oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2934 +       oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2935         oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2936         oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size);
2937         oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed);
2938 --- head-2012-01-06.orig/fs/aio.c       2012-01-06 10:21:23.000000000 +0100
2939 +++ head-2012-01-06/fs/aio.c    2011-11-16 17:01:54.000000000 +0100
2940 @@ -39,6 +39,11 @@
2941  #include <asm/kmap_types.h>
2942  #include <asm/uaccess.h>
2943  
2944 +#ifdef CONFIG_EPOLL
2945 +#include <linux/poll.h>
2946 +#include <linux/eventpoll.h>
2947 +#endif
2948 +
2949  #if DEBUG > 1
2950  #define dprintk                printk
2951  #else
2952 @@ -1064,6 +1069,11 @@ put_rq:
2953         if (waitqueue_active(&ctx->wait))
2954                 wake_up(&ctx->wait);
2955  
2956 +#ifdef CONFIG_EPOLL
2957 +       if (ctx->file && waitqueue_active(&ctx->poll_wait))
2958 +               wake_up(&ctx->poll_wait);
2959 +#endif
2960 +
2961         spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2962         return ret;
2963  }
2964 @@ -1072,6 +1082,8 @@ EXPORT_SYMBOL(aio_complete);
2965  /* aio_read_evt
2966   *     Pull an event off of the ioctx's event ring.  Returns the number of 
2967   *     events fetched (0 or 1 ;-)
2968 + *     If ent parameter is 0, just returns the number of events that would
2969 + *     be fetched.
2970   *     FIXME: make this use cmpxchg.
2971   *     TODO: make the ringbuffer user mmap()able (requires FIXME).
2972   */
2973 @@ -1094,13 +1106,18 @@ static int aio_read_evt(struct kioctx *i
2974  
2975         head = ring->head % info->nr;
2976         if (head != ring->tail) {
2977 -               struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2978 -               *ent = *evp;
2979 -               head = (head + 1) % info->nr;
2980 -               smp_mb(); /* finish reading the event before updatng the head */
2981 -               ring->head = head;
2982 -               ret = 1;
2983 -               put_aio_ring_event(evp, KM_USER1);
2984 +               if (ent) { /* event requested */
2985 +                       struct io_event *evp =
2986 +                               aio_ring_event(info, head, KM_USER1);
2987 +                       *ent = *evp;
2988 +                       head = (head + 1) % info->nr;
2989 +                       /* finish reading the event before updatng the head */
2990 +                       smp_mb();
2991 +                       ring->head = head;
2992 +                       ret = 1;
2993 +                       put_aio_ring_event(evp, KM_USER1);
2994 +               } else /* only need to know availability */
2995 +                       ret = 1;
2996         }
2997         spin_unlock(&info->ring_lock);
2998  
2999 @@ -1285,6 +1302,13 @@ static void io_destroy(struct kioctx *io
3000  
3001         aio_cancel_all(ioctx);
3002         wait_for_all_aios(ioctx);
3003 +#ifdef CONFIG_EPOLL
3004 +       /* forget the poll file, but it's up to the user to close it */
3005 +       if (ioctx->file) {
3006 +               ioctx->file->private_data = 0;
3007 +               ioctx->file = 0;
3008 +       }
3009 +#endif
3010  
3011         /*
3012          * Wake up any waiters.  The setting of ctx->dead must be seen
3013 @@ -1295,6 +1319,67 @@ static void io_destroy(struct kioctx *io
3014         put_ioctx(ioctx);       /* once for the lookup */
3015  }
3016  
3017 +#ifdef CONFIG_EPOLL
3018 +
3019 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
3020 +{
3021 +       struct kioctx *ioctx = file->private_data;
3022 +       if (ioctx) {
3023 +               file->private_data = 0;
3024 +               spin_lock_irq(&ioctx->ctx_lock);
3025 +               ioctx->file = 0;
3026 +               spin_unlock_irq(&ioctx->ctx_lock);
3027 +       }
3028 +       return 0;
3029 +}
3030 +
3031 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
3032 +{      unsigned int pollflags = 0;
3033 +       struct kioctx *ioctx = file->private_data;
3034 +
3035 +       if (ioctx) {
3036 +
3037 +               spin_lock_irq(&ioctx->ctx_lock);
3038 +               /* Insert inside our poll wait queue */
3039 +               poll_wait(file, &ioctx->poll_wait, wait);
3040 +
3041 +               /* Check our condition */
3042 +               if (aio_read_evt(ioctx, 0))
3043 +                       pollflags = POLLIN | POLLRDNORM;
3044 +               spin_unlock_irq(&ioctx->ctx_lock);
3045 +       }
3046 +
3047 +       return pollflags;
3048 +}
3049 +
3050 +static const struct file_operations aioq_fops = {
3051 +       .release        = aio_queue_fd_close,
3052 +       .poll           = aio_queue_fd_poll
3053 +};