Merge branch 'scripts'
[opensuse:kernel-source.git] / patches.xen / xen3-auto-common.diff
1 Subject: xen3 common
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1098:73e47d0fdb10)
3 Patch-mainline: n/a
4 Acked-by: jbeulich@novell.com
5
6 List of files that don't require modification anymore (and hence got
7 removed from this patch), for reference and in case upstream wants to
8 take the forward porting patches:
9 2.6.19/include/linux/skbuff.h
10 2.6.19/net/core/dev.c
11 2.6.19/net/core/skbuff.c
12 2.6.19/net/ipv4/netfilter/nf_nat_proto_tcp.c
13 2.6.19/net/ipv4/netfilter/nf_nat_proto_udp.c
14 2.6.19/net/ipv4/xfrm4_output.c
15 2.6.22/include/linux/sched.h
16 2.6.22/kernel/softlockup.c
17 2.6.22/kernel/timer.c
18 2.6.25/mm/highmem.c
19 2.6.30/include/linux/pci_regs.h
20 2.6.35/kernel/time.c
21
22 --- head-2011-07-21.orig/drivers/Makefile       2011-07-21 12:00:01.000000000 +0200
23 +++ head-2011-07-21/drivers/Makefile    2011-06-30 15:36:40.000000000 +0200
24 @@ -45,6 +45,7 @@ obj-$(CONFIG_PARPORT)         += parport/
25  obj-y                          += base/ block/ misc/ mfd/ nfc/
26  obj-$(CONFIG_NUBUS)            += nubus/
27  obj-y                          += macintosh/
28 +obj-$(CONFIG_XEN)              += xen/
29  obj-$(CONFIG_IDE)              += ide/
30  obj-$(CONFIG_SCSI)             += scsi/
31  obj-$(CONFIG_ATA)              += ata/
32 --- head-2011-07-21.orig/drivers/acpi/Makefile  2011-07-21 12:00:01.000000000 +0200
33 +++ head-2011-07-21/drivers/acpi/Makefile       2011-04-13 11:25:29.000000000 +0200
34 @@ -67,6 +67,9 @@ obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custo
35  processor-y                    := processor_driver.o processor_throttling.o
36  processor-y                    += processor_idle.o processor_thermal.o
37  processor-$(CONFIG_CPU_FREQ)   += processor_perflib.o
38 +ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
39 +processor-objs += processor_perflib.o processor_extcntl.o
40 +endif
41  
42  obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
43  obj-$(CONFIG_ACPI_IPMI)                += acpi_ipmi.o
44 --- head-2011-07-21.orig/drivers/acpi/acpica/hwsleep.c  2011-07-21 12:00:01.000000000 +0200
45 +++ head-2011-07-21/drivers/acpi/acpica/hwsleep.c       2011-04-13 11:25:29.000000000 +0200
46 @@ -236,7 +236,11 @@ acpi_status asmlinkage acpi_enter_sleep_
47         u32 pm1b_control;
48         struct acpi_bit_register_info *sleep_type_reg_info;
49         struct acpi_bit_register_info *sleep_enable_reg_info;
50 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
51         u32 in_value;
52 +#else
53 +       int err;
54 +#endif
55         struct acpi_object_list arg_list;
56         union acpi_object arg;
57         acpi_status status;
58 @@ -347,6 +351,7 @@ acpi_status asmlinkage acpi_enter_sleep_
59  
60         /* Write #2: Write both SLP_TYP + SLP_EN */
61  
62 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
63         status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
64         if (ACPI_FAILURE(status)) {
65                 return_ACPI_STATUS(status);
66 @@ -386,6 +391,16 @@ acpi_status asmlinkage acpi_enter_sleep_
67                 /* Spin until we wake */
68  
69         } while (!in_value);
70 +#else
71 +       /* PV ACPI just need check hypercall return value */
72 +       err = acpi_notify_hypervisor_state(sleep_state,
73 +                       PM1Acontrol, PM1Bcontrol);
74 +       if (err) {
75 +               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
76 +                                 "Hypervisor failure [%d]\n", err));
77 +               return_ACPI_STATUS(AE_ERROR);
78 +       }
79 +#endif
80  
81         return_ACPI_STATUS(AE_OK);
82  }
83 --- head-2011-07-21.orig/drivers/acpi/processor_driver.c        2011-07-21 12:00:01.000000000 +0200
84 +++ head-2011-07-21/drivers/acpi/processor_driver.c     2011-04-13 11:25:29.000000000 +0200
85 @@ -325,7 +325,8 @@ static int acpi_processor_get_info(struc
86          */
87         if (pr->id == -1) {
88                 if (ACPI_FAILURE
89 -                   (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
90 +                   (acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
91 +                   !processor_cntl_external()) {
92                         return -ENODEV;
93                 }
94         }
95 @@ -376,7 +377,11 @@ static int acpi_processor_get_info(struc
96         return 0;
97  }
98  
99 +#ifndef CONFIG_XEN
100  static DEFINE_PER_CPU(void *, processor_device_array);
101 +#else
102 +static void *processor_device_array[NR_ACPI_CPUS];
103 +#endif
104  
105  static void acpi_processor_notify(struct acpi_device *device, u32 event)
106  {
107 @@ -462,8 +467,11 @@ static int __cpuinit acpi_processor_add(
108         strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
109         device->driver_data = pr;
110  
111 +       processor_extcntl_init();
112 +
113         result = acpi_processor_get_info(device);
114 -       if (result) {
115 +       if (result ||
116 +           ((pr->id == -1) && !processor_cntl_external())) {
117                 /* Processor is physically not present */
118                 return 0;
119         }
120 @@ -473,23 +481,36 @@ static int __cpuinit acpi_processor_add(
121                 return 0;
122  #endif
123  
124 -       BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
125 +       BUG_ON(!processor_cntl_external() &&
126 +              ((pr->id >= nr_cpu_ids) || (pr->id < 0)));
127  
128         /*
129          * Buggy BIOS check
130          * ACPI id of processors can be reported wrongly by the BIOS.
131          * Don't trust it blindly
132          */
133 +#ifndef CONFIG_XEN
134         if (per_cpu(processor_device_array, pr->id) != NULL &&
135             per_cpu(processor_device_array, pr->id) != device) {
136 +#else
137 +       BUG_ON(pr->acpi_id >= NR_ACPI_CPUS);
138 +       if (processor_device_array[pr->acpi_id] != NULL &&
139 +           processor_device_array[pr->acpi_id] != device) {
140 +#endif
141                 printk(KERN_WARNING "BIOS reported wrong ACPI id "
142                         "for the processor\n");
143                 result = -ENODEV;
144                 goto err_free_cpumask;
145         }
146 +#ifndef CONFIG_XEN
147         per_cpu(processor_device_array, pr->id) = device;
148  
149         per_cpu(processors, pr->id) = pr;
150 +#else
151 +       processor_device_array[pr->acpi_id] = device;
152 +       if (pr->id != -1)
153 +               per_cpu(processors, pr->id) = pr;
154 +#endif
155  
156         sysdev = get_cpu_sysdev(pr->id);
157         if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
158 @@ -497,16 +518,28 @@ static int __cpuinit acpi_processor_add(
159                 goto err_free_cpumask;
160         }
161  
162 -#ifdef CONFIG_CPU_FREQ
163 +#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
164         acpi_processor_ppc_has_changed(pr, 0);
165  #endif
166 -       acpi_processor_get_throttling_info(pr);
167 -       acpi_processor_get_limit_info(pr);
168  
169 +       /*
170 +        * pr->id may equal to -1 while processor_cntl_external enabled.
171 +        * throttle and thermal module don't support this case.
172 +        * Tx only works when dom0 vcpu == pcpu num by far, as we give
173 +        * control to dom0.
174 +        */
175 +       if (pr->id != -1) {
176 +               acpi_processor_get_throttling_info(pr);
177 +               acpi_processor_get_limit_info(pr);
178 +       }
179  
180         if (cpuidle_get_driver() == &acpi_idle_driver)
181                 acpi_processor_power_init(pr, device);
182  
183 +       result = processor_extcntl_prepare(pr);
184 +       if (result)
185 +               goto end;
186 +
187         pr->cdev = thermal_cooling_device_register("Processor", device,
188                                                 &processor_cooling_ops);
189         if (IS_ERR(pr->cdev)) {
190 @@ -556,7 +589,7 @@ static int acpi_processor_remove(struct 
191  
192         pr = acpi_driver_data(device);
193  
194 -       if (pr->id >= nr_cpu_ids)
195 +       if (!processor_cntl_external() && pr->id >= nr_cpu_ids)
196                 goto free;
197  
198         if (type == ACPI_BUS_REMOVAL_EJECT) {
199 @@ -575,8 +608,14 @@ static int acpi_processor_remove(struct 
200                 pr->cdev = NULL;
201         }
202  
203 +#ifndef CONFIG_XEN
204         per_cpu(processors, pr->id) = NULL;
205         per_cpu(processor_device_array, pr->id) = NULL;
206 +#else
207 +       if (pr->id != -1)
208 +               per_cpu(processors, pr->id) = NULL;
209 +       processor_device_array[pr->acpi_id] = NULL;
210 +#endif
211  
212  free:
213         free_cpumask_var(pr->throttling.shared_cpu_map);
214 @@ -632,6 +671,10 @@ int acpi_processor_device_add(acpi_handl
215                 return -ENODEV;
216         }
217  
218 +       if (processor_cntl_external() && acpi_driver_data(*device))
219 +               processor_notify_external(acpi_driver_data(*device),
220 +                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
221 +
222         return 0;
223  }
224  
225 @@ -661,6 +704,10 @@ static void acpi_processor_hotplug_notif
226                                             "Unable to add the device\n");
227                         break;
228                 }
229 +               pr = acpi_driver_data(device);
230 +               if (processor_cntl_external() && pr)
231 +                       processor_notify_external(pr,
232 +                                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
233                 break;
234         case ACPI_NOTIFY_EJECT_REQUEST:
235                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
236 @@ -677,6 +724,9 @@ static void acpi_processor_hotplug_notif
237                                     "Driver data is NULL, dropping EJECT\n");
238                         return;
239                 }
240 +               if (processor_cntl_external())
241 +                       processor_notify_external(pr, PROCESSOR_HOTPLUG,
242 +                                               HOTPLUG_TYPE_REMOVE);
243                 break;
244         default:
245                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
246 @@ -741,6 +791,11 @@ static acpi_status acpi_processor_hotadd
247  
248  static int acpi_processor_handle_eject(struct acpi_processor *pr)
249  {
250 +#ifdef CONFIG_XEN
251 +       if (pr->id == -1)
252 +               return (0);
253 +#endif
254 +
255         if (cpu_online(pr->id))
256                 cpu_down(pr->id);
257  
258 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
259 +++ head-2011-07-21/drivers/acpi/processor_extcntl.c    2011-01-31 14:53:38.000000000 +0100
260 @@ -0,0 +1,241 @@
261 +/*
262 + * processor_extcntl.c - channel to external control logic
263 + *
264 + *  Copyright (C) 2008, Intel corporation
265 + *
266 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
267 + *
268 + *  This program is free software; you can redistribute it and/or modify
269 + *  it under the terms of the GNU General Public License as published by
270 + *  the Free Software Foundation; either version 2 of the License, or (at
271 + *  your option) any later version.
272 + *
273 + *  This program is distributed in the hope that it will be useful, but
274 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
275 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
276 + *  General Public License for more details.
277 + *
278 + *  You should have received a copy of the GNU General Public License along
279 + *  with this program; if not, write to the Free Software Foundation, Inc.,
280 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
281 + *
282 + */
283 +
284 +#include <linux/kernel.h>
285 +#include <linux/init.h>
286 +#include <linux/types.h>
287 +#include <linux/acpi.h>
288 +#include <linux/pm.h>
289 +#include <linux/cpu.h>
290 +
291 +#include <acpi/processor.h>
292 +
293 +#define ACPI_PROCESSOR_COMPONENT        0x01000000
294 +#define ACPI_PROCESSOR_CLASS            "processor"
295 +#define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
296 +#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
297 +ACPI_MODULE_NAME("acpi_processor")
298 +
299 +static int processor_extcntl_parse_csd(struct acpi_processor *pr);
300 +static int processor_extcntl_get_performance(struct acpi_processor *pr);
301 +/*
302 + * External processor control logic may register with its own set of
303 + * ops to get ACPI related notification. One example is like VMM.
304 + */
305 +const struct processor_extcntl_ops *processor_extcntl_ops;
306 +EXPORT_SYMBOL(processor_extcntl_ops);
307 +
308 +static int processor_notify_smm(void)
309 +{
310 +       acpi_status status;
311 +       static int is_done = 0;
312 +
313 +       /* only need successfully notify BIOS once */
314 +       /* avoid double notification which may lead to unexpected result */
315 +       if (is_done)
316 +               return 0;
317 +
318 +       /* Can't write pstate_cnt to smi_cmd if either value is zero */
319 +       if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
320 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
321 +               return 0;
322 +       }
323 +
324 +       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
325 +               "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
326 +               acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
327 +
328 +       /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
329 +        * it anyway, so we need to support it... */
330 +       if (acpi_fadt_is_v1) {
331 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
332 +                       "Using v1.0 FADT reserved value for pstate_cnt\n"));
333 +       }
334 +
335 +       status = acpi_os_write_port(acpi_fadt.smi_cmd,
336 +                                   (u32) acpi_fadt.pstate_cnt, 8);
337 +       if (ACPI_FAILURE(status))
338 +               return status;
339 +
340 +       is_done = 1;
341 +
342 +       return 0;
343 +}
344 +
345 +int processor_notify_external(struct acpi_processor *pr, int event, int type)
346 +{
347 +       int ret = -EINVAL;
348 +
349 +       if (!processor_cntl_external())
350 +               return -EINVAL;
351 +
352 +       switch (event) {
353 +       case PROCESSOR_PM_INIT:
354 +       case PROCESSOR_PM_CHANGE:
355 +               if ((type >= PM_TYPE_MAX) ||
356 +                       !processor_extcntl_ops->pm_ops[type])
357 +                       break;
358 +
359 +               ret = processor_extcntl_ops->pm_ops[type](pr, event);
360 +               break;
361 +       case PROCESSOR_HOTPLUG:
362 +               if (processor_extcntl_ops->hotplug)
363 +                       ret = processor_extcntl_ops->hotplug(pr, type);
364 +               break;
365 +       default:
366 +               printk(KERN_ERR "Unsupport processor events %d.\n", event);
367 +               break;
368 +       }
369 +
370 +       return ret;
371 +}
372 +
373 +/*
374 + * External control logic can decide to grab full or part of physical
375 + * processor control bits. Take a VMM for example, physical processors
376 + * are owned by VMM and thus existence information like hotplug is
377 + * always required to be notified to VMM. Similar is processor idle
378 + * state which is also necessarily controlled by VMM. But for other
379 + * control bits like performance/throttle states, VMM may choose to
380 + * control or not upon its own policy.
381 + */
382 +void processor_extcntl_init(void)
383 +{
384 +       if (!processor_extcntl_ops)
385 +               arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
386 +}
387 +
388 +/*
389 + * This is called from ACPI processor init, and targeted to hold
390 + * some tricky housekeeping jobs to satisfy external control model.
391 + * For example, we may put dependency parse stub here for idle
392 + * and performance state. Those information may be not available
393 + * if splitting from dom0 control logic like cpufreq driver.
394 + */
395 +int processor_extcntl_prepare(struct acpi_processor *pr)
396 +{
397 +       /* parse cstate dependency information */
398 +       if (processor_pm_external())
399 +               processor_extcntl_parse_csd(pr);
400 +
401 +       /* Initialize performance states */
402 +       if (processor_pmperf_external())
403 +               processor_extcntl_get_performance(pr);
404 +
405 +       return 0;
406 +}
407 +
408 +/*
409 + * Currently no _CSD is implemented which is why existing ACPI code
410 + * doesn't parse _CSD at all. But to keep interface complete with
411 + * external control logic, we put a placeholder here for future
412 + * compatibility.
413 + */
414 +static int processor_extcntl_parse_csd(struct acpi_processor *pr)
415 +{
416 +       int i;
417 +
418 +       for (i = 0; i < pr->power.count; i++) {
419 +               if (!pr->power.states[i].valid)
420 +                       continue;
421 +
422 +               /* No dependency by default */
423 +               pr->power.states[i].domain_info = NULL;
424 +               pr->power.states[i].csd_count = 0;
425 +       }
426 +
427 +       return 0;
428 +}
429 +
430 +/*
431 + * Existing ACPI module does parse performance states at some point,
432 + * when acpi-cpufreq driver is loaded which however is something
433 + * we'd like to disable to avoid confliction with external control
434 + * logic. So we have to collect raw performance information here
435 + * when ACPI processor object is found and started.
436 + */
437 +static int processor_extcntl_get_performance(struct acpi_processor *pr)
438 +{
439 +       int ret;
440 +       struct acpi_processor_performance *perf;
441 +       struct acpi_psd_package *pdomain;
442 +
443 +       if (pr->performance)
444 +               return -EBUSY;
445 +
446 +       perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
447 +       if (!perf)
448 +               return -ENOMEM;
449 +
450 +       pr->performance = perf;
451 +       /* Get basic performance state information */
452 +       ret = acpi_processor_get_performance_info(pr);
453 +       if (ret < 0)
454 +               goto err_out;
455 +
456 +       /*
457 +        * Well, here we need retrieve performance dependency information
458 +        * from _PSD object. The reason why existing interface is not used
459 +        * is due to the reason that existing interface sticks to Linux cpu
460 +        * id to construct some bitmap, however we want to split ACPI
461 +        * processor objects from Linux cpu id logic. For example, even
462 +        * when Linux is configured as UP, we still want to parse all ACPI
463 +        * processor objects to external logic. In this case, it's preferred
464 +        * to use ACPI ID instead.
465 +        */
466 +       pdomain = &pr->performance->domain_info;
467 +       pdomain->num_processors = 0;
468 +       ret = acpi_processor_get_psd(pr);
469 +       if (ret < 0) {
470 +               /*
471 +                * _PSD is optional - assume no coordination if absent (or
472 +                * broken), matching native kernels' behavior.
473 +                */
474 +               pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
475 +               pdomain->revision = ACPI_PSD_REV0_REVISION;
476 +               pdomain->domain = pr->acpi_id;
477 +               pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
478 +               pdomain->num_processors = 1;
479 +       }
480 +
481 +       /* Some sanity check */
482 +       if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
483 +           (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
484 +           ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
485 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
486 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
487 +               ret = -EINVAL;
488 +               goto err_out;
489 +       }
490 +
491 +       /* Last step is to notify BIOS that external logic exists */
492 +       processor_notify_smm();
493 +
494 +       processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
495 +
496 +       return 0;
497 +err_out:
498 +       pr->performance = NULL;
499 +       kfree(perf);
500 +       return ret;
501 +}
502 --- head-2011-07-21.orig/drivers/acpi/processor_idle.c  2011-07-21 12:00:01.000000000 +0200
503 +++ head-2011-07-21/drivers/acpi/processor_idle.c       2011-04-13 11:25:29.000000000 +0200
504 @@ -458,7 +458,8 @@ static int acpi_processor_get_power_info
505                                  */
506                                 cx.entry_method = ACPI_CSTATE_HALT;
507                                 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
508 -                       } else {
509 +                       /* This doesn't apply to external control case */
510 +                       } else if (!processor_pm_external()) {
511                                 continue;
512                         }
513                         if (cx.type == ACPI_STATE_C1 &&
514 @@ -497,6 +498,12 @@ static int acpi_processor_get_power_info
515  
516                 cx.power = obj->integer.value;
517  
518 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
519 +               /* cache control methods to notify external logic */
520 +               if (processor_pm_external())
521 +                       memcpy(&cx.reg, reg, sizeof(*reg));
522 +#endif
523 +
524                 current_count++;
525                 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
526  
527 @@ -1130,6 +1137,11 @@ int __cpuinit acpi_processor_power_init(
528                 if (cpuidle_register_device(&pr->power.dev))
529                         return -EIO;
530         }
531 +
532 +       if (processor_pm_external())
533 +               processor_notify_external(pr,
534 +                       PROCESSOR_PM_INIT, PM_TYPE_IDLE);
535 +
536         return 0;
537  }
538  
539 --- head-2011-07-21.orig/drivers/acpi/processor_perflib.c       2011-07-21 12:00:01.000000000 +0200
540 +++ head-2011-07-21/drivers/acpi/processor_perflib.c    2011-06-30 15:36:54.000000000 +0200
541 @@ -75,6 +75,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre
542  
543  static int acpi_processor_ppc_status;
544  
545 +#ifdef CONFIG_CPU_FREQ
546  static int acpi_processor_ppc_notifier(struct notifier_block *nb,
547                                        unsigned long event, void *data)
548  {
549 @@ -117,6 +118,7 @@ static int acpi_processor_ppc_notifier(s
550  static struct notifier_block acpi_ppc_notifier_block = {
551         .notifier_call = acpi_processor_ppc_notifier,
552  };
553 +#endif /* CONFIG_CPU_FREQ */
554  
555  static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
556  {
557 @@ -205,7 +207,12 @@ int acpi_processor_ppc_has_changed(struc
558         if (ret < 0)
559                 return (ret);
560         else
561 +#ifdef CONFIG_CPU_FREQ
562                 return cpufreq_update_policy(pr->id);
563 +#elif defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
564 +               return processor_notify_external(pr,
565 +                               PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
566 +#endif
567  }
568  
569  int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
570 @@ -221,6 +228,7 @@ int acpi_processor_get_bios_limit(int cp
571  }
572  EXPORT_SYMBOL(acpi_processor_get_bios_limit);
573  
574 +#ifdef CONFIG_CPU_FREQ
575  void acpi_processor_ppc_init(void)
576  {
577         if (!cpufreq_register_notifier
578 @@ -239,6 +247,7 @@ void acpi_processor_ppc_exit(void)
579  
580         acpi_processor_ppc_status &= ~PPC_REGISTERED;
581  }
582 +#endif /* CONFIG_CPU_FREQ */
583  
584  static int acpi_processor_get_performance_control(struct acpi_processor *pr)
585  {
586 @@ -386,7 +395,10 @@ static int acpi_processor_get_performanc
587         return result;
588  }
589  
590 -static int acpi_processor_get_performance_info(struct acpi_processor *pr)
591 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
592 +static
593 +#endif
594 +int acpi_processor_get_performance_info(struct acpi_processor *pr)
595  {
596         int result = 0;
597         acpi_status status = AE_OK;
598 @@ -431,6 +443,7 @@ static int acpi_processor_get_performanc
599         return result;
600  }
601  
602 +#ifdef CONFIG_CPU_FREQ
603  int acpi_processor_notify_smm(struct module *calling_module)
604  {
605         acpi_status status;
606 @@ -491,8 +504,12 @@ int acpi_processor_notify_smm(struct mod
607  }
608  
609  EXPORT_SYMBOL(acpi_processor_notify_smm);
610 +#endif /* CONFIG_CPU_FREQ */
611  
612 -static int acpi_processor_get_psd(struct acpi_processor        *pr)
613 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
614 +static
615 +#endif
616 +int acpi_processor_get_psd(struct acpi_processor *pr)
617  {
618         int result = 0;
619         acpi_status status = AE_OK;
620 --- head-2011-07-21.orig/drivers/acpi/sleep.c   2011-07-21 12:00:01.000000000 +0200
621 +++ head-2011-07-21/drivers/acpi/sleep.c        2011-04-13 11:25:29.000000000 +0200
622 @@ -61,6 +61,7 @@ static struct notifier_block tts_notifie
623  static int acpi_sleep_prepare(u32 acpi_state)
624  {
625  #ifdef CONFIG_ACPI_SLEEP
626 +#ifndef CONFIG_ACPI_PV_SLEEP
627         /* do we have a wakeup address for S2 and S3? */
628         if (acpi_state == ACPI_STATE_S3) {
629                 if (!acpi_wakeup_address) {
630 @@ -70,6 +71,7 @@ static int acpi_sleep_prepare(u32 acpi_s
631                                 (acpi_physical_address)acpi_wakeup_address);
632  
633         }
634 +#endif
635         ACPI_FLUSH_CPU_CACHE();
636  #endif
637         printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n",
638 --- head-2011-07-21.orig/drivers/char/agp/intel-gtt.c   2011-07-21 12:00:01.000000000 +0200
639 +++ head-2011-07-21/drivers/char/agp/intel-gtt.c        2011-04-13 11:25:29.000000000 +0200
640 @@ -147,8 +147,19 @@ static struct page *i8xx_alloc_pages(voi
641         if (page == NULL)
642                 return NULL;
643  
644 +#ifdef CONFIG_XEN
645 +       if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
646 +               __free_pages(page, 2);
647 +               return NULL;
648 +       }
649 +#endif
650 +
651         if (set_pages_uc(page, 4) < 0) {
652                 set_pages_wb(page, 4);
653 +#ifdef CONFIG_XEN
654 +               xen_destroy_contiguous_region((unsigned long)page_address(page),
655 +                                             2);
656 +#endif
657                 __free_pages(page, 2);
658                 return NULL;
659         }
660 @@ -163,6 +174,9 @@ static void i8xx_destroy_pages(struct pa
661                 return;
662  
663         set_pages_wb(page, 4);
664 +#ifdef CONFIG_XEN
665 +       xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
666 +#endif
667         put_page(page);
668         __free_pages(page, 2);
669         atomic_dec(&agp_bridge->current_memory_agp);
670 --- head-2011-07-21.orig/drivers/char/mem.c     2011-07-21 12:00:01.000000000 +0200
671 +++ head-2011-07-21/drivers/char/mem.c  2011-05-23 10:56:58.000000000 +0200
672 @@ -86,6 +86,7 @@ void __weak unxlate_dev_mem_ptr(unsigned
673  {
674  }
675  
676 +#ifndef ARCH_HAS_DEV_MEM
677  /*
678   * This funcion reads the *physical* memory. The f_pos points directly to the
679   * memory location.
680 @@ -208,6 +209,7 @@ static ssize_t write_mem(struct file *fi
681         *ppos += written;
682         return written;
683  }
684 +#endif
685  
686  int __weak phys_mem_access_prot_allowed(struct file *file,
687         unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
688 @@ -334,6 +336,9 @@ static int mmap_mem(struct file *file, s
689  static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
690  {
691         unsigned long pfn;
692 +#ifdef CONFIG_XEN
693 +       unsigned long i, count;
694 +#endif
695  
696         /* Turn a kernel-virtual address into a physical page frame */
697         pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
698 @@ -348,6 +353,13 @@ static int mmap_kmem(struct file *file, 
699         if (!pfn_valid(pfn))
700                 return -EIO;
701  
702 +#ifdef CONFIG_XEN
703 +       count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
704 +       for (i = 0; i < count; i++)
705 +               if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i)))
706 +                       return -EIO;
707 +#endif
708 +
709         vma->vm_pgoff = pfn;
710         return mmap_mem(file, vma);
711  }
712 @@ -739,6 +751,7 @@ static int open_port(struct inode * inod
713  #define open_kmem      open_mem
714  #define open_oldmem    open_mem
715  
716 +#ifndef ARCH_HAS_DEV_MEM
717  static const struct file_operations mem_fops = {
718         .llseek         = memory_lseek,
719         .read           = read_mem,
720 @@ -747,6 +760,9 @@ static const struct file_operations mem_
721         .open           = open_mem,
722         .get_unmapped_area = get_unmapped_area_mem,
723  };
724 +#else
725 +extern const struct file_operations mem_fops;
726 +#endif
727  
728  #ifdef CONFIG_DEVKMEM
729  static const struct file_operations kmem_fops = {
730 --- head-2011-07-21.orig/drivers/char/tpm/Makefile      2011-07-21 12:00:01.000000000 +0200
731 +++ head-2011-07-21/drivers/char/tpm/Makefile   2011-04-13 11:25:29.000000000 +0200
732 @@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o
733  obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
734  obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
735  obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
736 +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
737 +tpm_xenu-y = tpm_xen.o tpm_vtpm.o
738 --- head-2011-07-21.orig/drivers/char/tpm/tpm.h 2011-07-21 12:00:01.000000000 +0200
739 +++ head-2011-07-21/drivers/char/tpm/tpm.h      2011-04-13 11:25:29.000000000 +0200
740 @@ -108,6 +108,9 @@ struct tpm_chip {
741         struct dentry **bios_dir;
742  
743         struct list_head list;
744 +#ifdef CONFIG_XEN
745 +       void *priv;
746 +#endif
747         void (*release) (struct device *);
748  };
749  
750 @@ -272,6 +275,18 @@ struct tpm_cmd_t {
751  
752  ssize_t        tpm_getcap(struct device *, __be32, cap_t *, const char *);
753  
754 +#ifdef CONFIG_XEN
755 +static inline void *chip_get_private(const struct tpm_chip *chip)
756 +{
757 +       return chip->priv;
758 +}
759 +
760 +static inline void chip_set_private(struct tpm_chip *chip, void *priv)
761 +{
762 +       chip->priv = priv;
763 +}
764 +#endif
765 +
766  extern void tpm_get_timeouts(struct tpm_chip *);
767  extern void tpm_gen_interrupt(struct tpm_chip *);
768  extern void tpm_continue_selftest(struct tpm_chip *);
769 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
770 +++ head-2011-07-21/drivers/char/tpm/tpm_vtpm.c 2011-01-31 14:53:38.000000000 +0100
771 @@ -0,0 +1,542 @@
772 +/*
773 + * Copyright (C) 2006 IBM Corporation
774 + *
775 + * Authors:
776 + * Stefan Berger <stefanb@us.ibm.com>
777 + *
778 + * Generic device driver part for device drivers in a virtualized
779 + * environment.
780 + *
781 + * This program is free software; you can redistribute it and/or
782 + * modify it under the terms of the GNU General Public License as
783 + * published by the Free Software Foundation, version 2 of the
784 + * License.
785 + *
786 + */
787 +
788 +#include <asm/uaccess.h>
789 +#include <linux/list.h>
790 +#include <linux/device.h>
791 +#include <linux/interrupt.h>
792 +#include <linux/platform_device.h>
793 +#include "tpm.h"
794 +#include "tpm_vtpm.h"
795 +
796 +/* read status bits */
797 +enum {
798 +       STATUS_BUSY = 0x01,
799 +       STATUS_DATA_AVAIL = 0x02,
800 +       STATUS_READY = 0x04
801 +};
802 +
803 +struct transmission {
804 +       struct list_head next;
805 +
806 +       unsigned char *request;
807 +       size_t  request_len;
808 +       size_t  request_buflen;
809 +
810 +       unsigned char *response;
811 +       size_t  response_len;
812 +       size_t  response_buflen;
813 +
814 +       unsigned int flags;
815 +};
816 +
817 +enum {
818 +       TRANSMISSION_FLAG_WAS_QUEUED = 0x1
819 +};
820 +
821 +
822 +enum {
823 +       DATAEX_FLAG_QUEUED_ONLY = 0x1
824 +};
825 +
826 +
827 +/* local variables */
828 +
829 +/* local function prototypes */
830 +static int _vtpm_send_queued(struct tpm_chip *chip);
831 +
832 +
833 +/* =============================================================
834 + * Some utility functions
835 + * =============================================================
836 + */
837 +static void vtpm_state_init(struct vtpm_state *vtpms)
838 +{
839 +       vtpms->current_request = NULL;
840 +       spin_lock_init(&vtpms->req_list_lock);
841 +       init_waitqueue_head(&vtpms->req_wait_queue);
842 +       INIT_LIST_HEAD(&vtpms->queued_requests);
843 +
844 +       vtpms->current_response = NULL;
845 +       spin_lock_init(&vtpms->resp_list_lock);
846 +       init_waitqueue_head(&vtpms->resp_wait_queue);
847 +
848 +       vtpms->disconnect_time = jiffies;
849 +}
850 +
851 +
852 +static inline struct transmission *transmission_alloc(void)
853 +{
854 +       return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
855 +}
856 +
857 +static unsigned char *
858 +transmission_set_req_buffer(struct transmission *t,
859 +                            unsigned char *buffer, size_t len)
860 +{
861 +       if (t->request_buflen < len) {
862 +               kfree(t->request);
863 +               t->request = kmalloc(len, GFP_KERNEL);
864 +               if (!t->request) {
865 +                       t->request_buflen = 0;
866 +                       return NULL;
867 +               }
868 +               t->request_buflen = len;
869 +       }
870 +
871 +       memcpy(t->request, buffer, len);
872 +       t->request_len = len;
873 +
874 +       return t->request;
875 +}
876 +
877 +static unsigned char *
878 +transmission_set_res_buffer(struct transmission *t,
879 +                            const unsigned char *buffer, size_t len)
880 +{
881 +       if (t->response_buflen < len) {
882 +               kfree(t->response);
883 +               t->response = kmalloc(len, GFP_ATOMIC);
884 +               if (!t->response) {
885 +                       t->response_buflen = 0;
886 +                       return NULL;
887 +               }
888 +               t->response_buflen = len;
889 +       }
890 +
891 +       memcpy(t->response, buffer, len);
892 +       t->response_len = len;
893 +
894 +       return t->response;
895 +}
896 +
897 +static inline void transmission_free(struct transmission *t)
898 +{
899 +       kfree(t->request);
900 +       kfree(t->response);
901 +       kfree(t);
902 +}
903 +
904 +/* =============================================================
905 + * Interface with the lower layer driver
906 + * =============================================================
907 + */
908 +/*
909 + * Lower layer uses this function to make a response available.
910 + */
911 +int vtpm_vd_recv(const struct tpm_chip *chip,
912 +                 const unsigned char *buffer, size_t count,
913 +                 void *ptr)
914 +{
915 +       unsigned long flags;
916 +       int ret_size = 0;
917 +       struct transmission *t;
918 +       struct vtpm_state *vtpms;
919 +
920 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
921 +
922 +       /*
923 +        * The list with requests must contain one request
924 +        * only and the element there must be the one that
925 +        * was passed to me from the front-end.
926 +        */
927 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
928 +       if (vtpms->current_request != ptr) {
929 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
930 +               return 0;
931 +       }
932 +
933 +       if ((t = vtpms->current_request)) {
934 +               transmission_free(t);
935 +               vtpms->current_request = NULL;
936 +       }
937 +
938 +       t = transmission_alloc();
939 +       if (t) {
940 +               if (!transmission_set_res_buffer(t, buffer, count)) {
941 +                       transmission_free(t);
942 +                       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
943 +                       return -ENOMEM;
944 +               }
945 +               ret_size = count;
946 +               vtpms->current_response = t;
947 +               wake_up_interruptible(&vtpms->resp_wait_queue);
948 +       }
949 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
950 +
951 +       return ret_size;
952 +}
953 +
954 +
955 +/*
956 + * Lower layer indicates its status (connected/disconnected)
957 + */
958 +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
959 +{
960 +       struct vtpm_state *vtpms;
961 +
962 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
963 +
964 +       vtpms->vd_status = vd_status;
965 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
966 +               vtpms->disconnect_time = jiffies;
967 +       }
968 +}
969 +
970 +/* =============================================================
971 + * Interface with the generic TPM driver
972 + * =============================================================
973 + */
974 +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
975 +{
976 +       int rc = 0;
977 +       unsigned long flags;
978 +       struct vtpm_state *vtpms;
979 +
980 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
981 +
982 +       /*
983 +        * Check if the previous operation only queued the command
984 +        * In this case there won't be a response, so I just
985 +        * return from here and reset that flag. In any other
986 +        * case I should receive a response from the back-end.
987 +        */
988 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
989 +       if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
990 +               vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
991 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
992 +               /*
993 +                * The first few commands (measurements) must be
994 +                * queued since it might not be possible to talk to the
995 +                * TPM, yet.
996 +                * Return a response of up to 30 '0's.
997 +                */
998 +
999 +               count = min_t(size_t, count, 30);
1000 +               memset(buf, 0x0, count);
1001 +               return count;
1002 +       }
1003 +       /*
1004 +        * Check whether something is in the responselist and if
1005 +        * there's nothing in the list wait for something to appear.
1006 +        */
1007 +
1008 +       if (!vtpms->current_response) {
1009 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1010 +               interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
1011 +                                              1000);
1012 +               spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
1013 +       }
1014 +
1015 +       if (vtpms->current_response) {
1016 +               struct transmission *t = vtpms->current_response;
1017 +               vtpms->current_response = NULL;
1018 +               rc = min(count, t->response_len);
1019 +               memcpy(buf, t->response, rc);
1020 +               transmission_free(t);
1021 +       }
1022 +
1023 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1024 +       return rc;
1025 +}
1026 +
1027 +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
1028 +{
1029 +       int rc = 0;
1030 +       unsigned long flags;
1031 +       struct transmission *t = transmission_alloc();
1032 +       struct vtpm_state *vtpms;
1033 +
1034 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1035 +
1036 +       if (!t)
1037 +               return -ENOMEM;
1038 +       /*
1039 +        * If there's a current request, it must be the
1040 +        * previous request that has timed out.
1041 +        */
1042 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1043 +       if (vtpms->current_request != NULL) {
1044 +               printk("WARNING: Sending although there is a request outstanding.\n"
1045 +                      "         Previous request must have timed out.\n");
1046 +               transmission_free(vtpms->current_request);
1047 +               vtpms->current_request = NULL;
1048 +       }
1049 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1050 +
1051 +       /*
1052 +        * Queue the packet if the driver below is not
1053 +        * ready, yet, or there is any packet already
1054 +        * in the queue.
1055 +        * If the driver below is ready, unqueue all
1056 +        * packets first before sending our current
1057 +        * packet.
1058 +        * For each unqueued packet, except for the
1059 +        * last (=current) packet, call the function
1060 +        * tpm_xen_recv to wait for the response to come
1061 +        * back.
1062 +        */
1063 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1064 +               if (time_after(jiffies,
1065 +                              vtpms->disconnect_time + HZ * 10)) {
1066 +                       rc = -ENOENT;
1067 +               } else {
1068 +                       goto queue_it;
1069 +               }
1070 +       } else {
1071 +               /*
1072 +                * Send all queued packets.
1073 +                */
1074 +               if (_vtpm_send_queued(chip) == 0) {
1075 +
1076 +                       vtpms->current_request = t;
1077 +
1078 +                       rc = vtpm_vd_send(vtpms->tpm_private,
1079 +                                         buf,
1080 +                                         count,
1081 +                                         t);
1082 +                       /*
1083 +                        * The generic TPM driver will call
1084 +                        * the function to receive the response.
1085 +                        */
1086 +                       if (rc < 0) {
1087 +                               vtpms->current_request = NULL;
1088 +                               goto queue_it;
1089 +                       }
1090 +               } else {
1091 +queue_it:
1092 +                       if (!transmission_set_req_buffer(t, buf, count)) {
1093 +                               transmission_free(t);
1094 +                               rc = -ENOMEM;
1095 +                               goto exit;
1096 +                       }
1097 +                       /*
1098 +                        * An error occurred. Don't event try
1099 +                        * to send the current request. Just
1100 +                        * queue it.
1101 +                        */
1102 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1103 +                       vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
1104 +                       list_add_tail(&t->next, &vtpms->queued_requests);
1105 +                       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1106 +               }
1107 +       }
1108 +
1109 +exit:
1110 +       return rc;
1111 +}
1112 +
1113 +
1114 +/*
1115 + * Send all queued requests.
1116 + */
1117 +static int _vtpm_send_queued(struct tpm_chip *chip)
1118 +{
1119 +       int rc;
1120 +       int error = 0;
1121 +       long flags;
1122 +       unsigned char buffer[1];
1123 +       struct vtpm_state *vtpms;
1124 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1125 +
1126 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1127 +
1128 +       while (!list_empty(&vtpms->queued_requests)) {
1129 +               /*
1130 +                * Need to dequeue them.
1131 +                * Read the result into a dummy buffer.
1132 +                */
1133 +               struct transmission *qt = (struct transmission *)
1134 +                                         vtpms->queued_requests.next;
1135 +               list_del(&qt->next);
1136 +               vtpms->current_request = qt;
1137 +               spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1138 +
1139 +               rc = vtpm_vd_send(vtpms->tpm_private,
1140 +                                 qt->request,
1141 +                                 qt->request_len,
1142 +                                 qt);
1143 +
1144 +               if (rc < 0) {
1145 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1146 +                       if ((qt = vtpms->current_request) != NULL) {
1147 +                               /*
1148 +                                * requeue it at the beginning
1149 +                                * of the list
1150 +                                */
1151 +                               list_add(&qt->next,
1152 +                                        &vtpms->queued_requests);
1153 +                       }
1154 +                       vtpms->current_request = NULL;
1155 +                       error = 1;
1156 +                       break;
1157 +               }
1158 +               /*
1159 +                * After this point qt is not valid anymore!
1160 +                * It is freed when the front-end is delivering
1161 +                * the data by calling tpm_recv
1162 +                */
1163 +               /*
1164 +                * Receive response into provided dummy buffer
1165 +                */
1166 +               rc = vtpm_recv(chip, buffer, sizeof(buffer));
1167 +               spin_lock_irqsave(&vtpms->req_list_lock, flags);
1168 +       }
1169 +
1170 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1171 +
1172 +       return error;
1173 +}
1174 +
1175 +static void vtpm_cancel(struct tpm_chip *chip)
1176 +{
1177 +       unsigned long flags;
1178 +       struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
1179 +
1180 +       spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1181 +
1182 +       if (!vtpms->current_response && vtpms->current_request) {
1183 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1184 +               interruptible_sleep_on(&vtpms->resp_wait_queue);
1185 +               spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1186 +       }
1187 +
1188 +       if (vtpms->current_response) {
1189 +               struct transmission *t = vtpms->current_response;
1190 +               vtpms->current_response = NULL;
1191 +               transmission_free(t);
1192 +       }
1193 +
1194 +       spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
1195 +}
1196 +
1197 +static u8 vtpm_status(struct tpm_chip *chip)
1198 +{
1199 +       u8 rc = 0;
1200 +       unsigned long flags;
1201 +       struct vtpm_state *vtpms;
1202 +
1203 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1204 +
1205 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1206 +       /*
1207 +        * Data are available if:
1208 +        *  - there's a current response
1209 +        *  - the last packet was queued only (this is fake, but necessary to
1210 +        *      get the generic TPM layer to call the receive function.)
1211 +        */
1212 +       if (vtpms->current_response ||
1213 +           0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
1214 +               rc = STATUS_DATA_AVAIL;
1215 +       } else if (!vtpms->current_response && !vtpms->current_request) {
1216 +               rc = STATUS_READY;
1217 +       }
1218 +
1219 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1220 +       return rc;
1221 +}
1222 +
1223 +static struct file_operations vtpm_ops = {
1224 +       .owner = THIS_MODULE,
1225 +       .llseek = no_llseek,
1226 +       .open = tpm_open,
1227 +       .read = tpm_read,
1228 +       .write = tpm_write,
1229 +       .release = tpm_release,
1230 +};
1231 +
1232 +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
1233 +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
1234 +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
1235 +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
1236 +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
1237 +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
1238 +                  NULL);
1239 +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
1240 +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
1241 +
1242 +static struct attribute *vtpm_attrs[] = {
1243 +       &dev_attr_pubek.attr,
1244 +       &dev_attr_pcrs.attr,
1245 +       &dev_attr_enabled.attr,
1246 +       &dev_attr_active.attr,
1247 +       &dev_attr_owned.attr,
1248 +       &dev_attr_temp_deactivated.attr,
1249 +       &dev_attr_caps.attr,
1250 +       &dev_attr_cancel.attr,
1251 +       NULL,
1252 +};
1253 +
1254 +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
1255 +
1256 +#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
1257 +
1258 +static struct tpm_vendor_specific tpm_vtpm = {
1259 +       .recv = vtpm_recv,
1260 +       .send = vtpm_send,
1261 +       .cancel = vtpm_cancel,
1262 +       .status = vtpm_status,
1263 +       .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
1264 +       .req_complete_val  = STATUS_DATA_AVAIL,
1265 +       .req_canceled = STATUS_READY,
1266 +       .attr_group = &vtpm_attr_grp,
1267 +       .miscdev = {
1268 +               .fops = &vtpm_ops,
1269 +       },
1270 +       .duration = {
1271 +               TPM_LONG_TIMEOUT,
1272 +               TPM_LONG_TIMEOUT,
1273 +               TPM_LONG_TIMEOUT,
1274 +       },
1275 +};
1276 +
1277 +struct tpm_chip *init_vtpm(struct device *dev,
1278 +                           struct tpm_private *tp)
1279 +{
1280 +       long rc;
1281 +       struct tpm_chip *chip;
1282 +       struct vtpm_state *vtpms;
1283 +
1284 +       vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
1285 +       if (!vtpms)
1286 +               return ERR_PTR(-ENOMEM);
1287 +
1288 +       vtpm_state_init(vtpms);
1289 +       vtpms->tpm_private = tp;
1290 +
1291 +       chip = tpm_register_hardware(dev, &tpm_vtpm);
1292 +       if (!chip) {
1293 +               rc = -ENODEV;
1294 +               goto err_free_mem;
1295 +       }
1296 +
1297 +       chip_set_private(chip, vtpms);
1298 +
1299 +       return chip;
1300 +
1301 +err_free_mem:
1302 +       kfree(vtpms);
1303 +
1304 +       return ERR_PTR(rc);
1305 +}
1306 +
1307 +void cleanup_vtpm(struct device *dev)
1308 +{
1309 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1310 +       struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
1311 +       tpm_remove_hardware(dev);
1312 +       kfree(vtpms);
1313 +}
1314 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1315 +++ head-2011-07-21/drivers/char/tpm/tpm_vtpm.h 2011-01-31 14:53:38.000000000 +0100
1316 @@ -0,0 +1,55 @@
1317 +#ifndef TPM_VTPM_H
1318 +#define TPM_VTPM_H
1319 +
1320 +struct tpm_chip;
1321 +struct tpm_private;
1322 +
1323 +struct vtpm_state {
1324 +       struct transmission *current_request;
1325 +       spinlock_t           req_list_lock;
1326 +       wait_queue_head_t    req_wait_queue;
1327 +
1328 +       struct list_head     queued_requests;
1329 +
1330 +       struct transmission *current_response;
1331 +       spinlock_t           resp_list_lock;
1332 +       wait_queue_head_t    resp_wait_queue;     // processes waiting for responses
1333 +
1334 +       u8                   vd_status;
1335 +       u8                   flags;
1336 +
1337 +       unsigned long        disconnect_time;
1338 +
1339 +       /*
1340 +        * The following is a private structure of the underlying
1341 +        * driver. It is passed as parameter in the send function.
1342 +        */
1343 +       struct tpm_private *tpm_private;
1344 +};
1345 +
1346 +
1347 +enum vdev_status {
1348 +       TPM_VD_STATUS_DISCONNECTED = 0x0,
1349 +       TPM_VD_STATUS_CONNECTED = 0x1
1350 +};
1351 +
1352 +/* this function is called from tpm_vtpm.c */
1353 +int vtpm_vd_send(struct tpm_private * tp,
1354 +                 const u8 * buf, size_t count, void *ptr);
1355 +
1356 +/* these functions are offered by tpm_vtpm.c */
1357 +struct tpm_chip *init_vtpm(struct device *,
1358 +                           struct tpm_private *);
1359 +void cleanup_vtpm(struct device *);
1360 +int vtpm_vd_recv(const struct tpm_chip* chip,
1361 +                 const unsigned char *buffer, size_t count, void *ptr);
1362 +void vtpm_vd_status(const struct tpm_chip *, u8 status);
1363 +
1364 +static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
1365 +{
1366 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1367 +       struct vtpm_state *vtpms = chip_get_private(chip);
1368 +       return vtpms->tpm_private;
1369 +}
1370 +
1371 +#endif
1372 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1373 +++ head-2011-07-21/drivers/char/tpm/tpm_xen.c  2011-01-31 14:53:38.000000000 +0100
1374 @@ -0,0 +1,722 @@
1375 +/*
1376 + * Copyright (c) 2005, IBM Corporation
1377 + *
1378 + * Author: Stefan Berger, stefanb@us.ibm.com
1379 + * Grant table support: Mahadevan Gomathisankaran
1380 + *
1381 + * This code has been derived from drivers/xen/netfront/netfront.c
1382 + *
1383 + * Copyright (c) 2002-2004, K A Fraser
1384 + *
1385 + * This program is free software; you can redistribute it and/or
1386 + * modify it under the terms of the GNU General Public License version 2
1387 + * as published by the Free Software Foundation; or, when distributed
1388 + * separately from the Linux kernel or incorporated into other
1389 + * software packages, subject to the following license:
1390 + *
1391 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1392 + * of this source file (the "Software"), to deal in the Software without
1393 + * restriction, including without limitation the rights to use, copy, modify,
1394 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1395 + * and to permit persons to whom the Software is furnished to do so, subject to
1396 + * the following conditions:
1397 + *
1398 + * The above copyright notice and this permission notice shall be included in
1399 + * all copies or substantial portions of the Software.
1400 + *
1401 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1402 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1403 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1404 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1405 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1406 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1407 + * IN THE SOFTWARE.
1408 + */
1409 +
1410 +#include <linux/errno.h>
1411 +#include <linux/err.h>
1412 +#include <linux/interrupt.h>
1413 +#include <linux/mutex.h>
1414 +#include <asm/uaccess.h>
1415 +#include <xen/evtchn.h>
1416 +#include <xen/interface/grant_table.h>
1417 +#include <xen/interface/io/tpmif.h>
1418 +#include <xen/gnttab.h>
1419 +#include <xen/xenbus.h>
1420 +#include "tpm.h"
1421 +#include "tpm_vtpm.h"
1422 +
1423 +#undef DEBUG
1424 +
1425 +/* local structures */
1426 +struct tpm_private {
1427 +       struct tpm_chip *chip;
1428 +
1429 +       tpmif_tx_interface_t *tx;
1430 +       atomic_t refcnt;
1431 +       unsigned int irq;
1432 +       u8 is_connected;
1433 +       u8 is_suspended;
1434 +
1435 +       spinlock_t tx_lock;
1436 +
1437 +       struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
1438 +
1439 +       atomic_t tx_busy;
1440 +       void *tx_remember;
1441 +
1442 +       domid_t backend_id;
1443 +       wait_queue_head_t wait_q;
1444 +
1445 +       struct xenbus_device *dev;
1446 +       int ring_ref;
1447 +};
1448 +
1449 +struct tx_buffer {
1450 +       unsigned int size;      // available space in data
1451 +       unsigned int len;       // used space in data
1452 +       unsigned char *data;    // pointer to a page
1453 +};
1454 +
1455 +
1456 +/* locally visible variables */
1457 +static grant_ref_t gref_head;
1458 +static struct tpm_private *my_priv;
1459 +
1460 +/* local function prototypes */
1461 +static irqreturn_t tpmif_int(int irq,
1462 +                             void *tpm_priv,
1463 +                             struct pt_regs *ptregs);
1464 +static void tpmif_rx_action(unsigned long unused);
1465 +static int tpmif_connect(struct xenbus_device *dev,
1466 +                         struct tpm_private *tp,
1467 +                         domid_t domid);
1468 +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
1469 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
1470 +static void tpmif_free_tx_buffers(struct tpm_private *tp);
1471 +static void tpmif_set_connected_state(struct tpm_private *tp,
1472 +                                      u8 newstate);
1473 +static int tpm_xmit(struct tpm_private *tp,
1474 +                    const u8 * buf, size_t count, int userbuffer,
1475 +                    void *remember);
1476 +static void destroy_tpmring(struct tpm_private *tp);
1477 +void __exit tpmif_exit(void);
1478 +
1479 +#define DPRINTK(fmt, args...) \
1480 +    pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
1481 +#define IPRINTK(fmt, args...) \
1482 +    printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
1483 +#define WPRINTK(fmt, args...) \
1484 +    printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
1485 +
1486 +#define GRANT_INVALID_REF      0
1487 +
1488 +
1489 +static inline int
1490 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
1491 +               int isuserbuffer)
1492 +{
1493 +       int copied = len;
1494 +
1495 +       if (len > txb->size)
1496 +               copied = txb->size;
1497 +       if (isuserbuffer) {
1498 +               if (copy_from_user(txb->data, src, copied))
1499 +                       return -EFAULT;
1500 +       } else {
1501 +               memcpy(txb->data, src, copied);
1502 +       }
1503 +       txb->len = len;
1504 +       return copied;
1505 +}
1506 +
1507 +static inline struct tx_buffer *tx_buffer_alloc(void)
1508 +{
1509 +       struct tx_buffer *txb;
1510 +
1511 +       txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
1512 +       if (!txb)
1513 +               return NULL;
1514 +
1515 +       txb->len = 0;
1516 +       txb->size = PAGE_SIZE;
1517 +       txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1518 +       if (txb->data == NULL) {
1519 +               kfree(txb);
1520 +               txb = NULL;
1521 +       }
1522 +
1523 +       return txb;
1524 +}
1525 +
1526 +
1527 +static inline void tx_buffer_free(struct tx_buffer *txb)
1528 +{
1529 +       if (txb) {
1530 +               free_page((long)txb->data);
1531 +               kfree(txb);
1532 +       }
1533 +}
1534 +
1535 +/**************************************************************
1536 + Utility function for the tpm_private structure
1537 +**************************************************************/
1538 +static void tpm_private_init(struct tpm_private *tp)
1539 +{
1540 +       spin_lock_init(&tp->tx_lock);
1541 +       init_waitqueue_head(&tp->wait_q);
1542 +       atomic_set(&tp->refcnt, 1);
1543 +}
1544 +
1545 +static void tpm_private_put(void)
1546 +{
1547 +       if (!atomic_dec_and_test(&my_priv->refcnt))
1548 +               return;
1549 +
1550 +       tpmif_free_tx_buffers(my_priv);
1551 +       kfree(my_priv);
1552 +       my_priv = NULL;
1553 +}
1554 +
1555 +static struct tpm_private *tpm_private_get(void)
1556 +{
1557 +       int err;
1558 +
1559 +       if (my_priv) {
1560 +               atomic_inc(&my_priv->refcnt);
1561 +               return my_priv;
1562 +       }
1563 +
1564 +       my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1565 +       if (!my_priv)
1566 +               return NULL;
1567 +
1568 +       tpm_private_init(my_priv);
1569 +       err = tpmif_allocate_tx_buffers(my_priv);
1570 +       if (err < 0)
1571 +               tpm_private_put();
1572 +
1573 +       return my_priv;
1574 +}
1575 +
1576 +/**************************************************************
1577 +
1578 + The interface to let the tpm plugin register its callback
1579 + function and send data to another partition using this module
1580 +
1581 +**************************************************************/
1582 +
1583 +static DEFINE_MUTEX(suspend_lock);
1584 +/*
1585 + * Send data via this module by calling this function
1586 + */
1587 +int vtpm_vd_send(struct tpm_private *tp,
1588 +                 const u8 * buf, size_t count, void *ptr)
1589 +{
1590 +       int sent;
1591 +
1592 +       mutex_lock(&suspend_lock);
1593 +       sent = tpm_xmit(tp, buf, count, 0, ptr);
1594 +       mutex_unlock(&suspend_lock);
1595 +
1596 +       return sent;
1597 +}
1598 +
1599 +/**************************************************************
1600 + XENBUS support code
1601 +**************************************************************/
1602 +
1603 +static int setup_tpmring(struct xenbus_device *dev,
1604 +                         struct tpm_private *tp)
1605 +{
1606 +       tpmif_tx_interface_t *sring;
1607 +       int err;
1608 +
1609 +       tp->ring_ref = GRANT_INVALID_REF;
1610 +
1611 +       sring = (void *)__get_free_page(GFP_KERNEL);
1612 +       if (!sring) {
1613 +               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1614 +               return -ENOMEM;
1615 +       }
1616 +       tp->tx = sring;
1617 +
1618 +       err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1619 +       if (err < 0) {
1620 +               free_page((unsigned long)sring);
1621 +               tp->tx = NULL;
1622 +               xenbus_dev_fatal(dev, err, "allocating grant reference");
1623 +               goto fail;
1624 +       }
1625 +       tp->ring_ref = err;
1626 +
1627 +       err = tpmif_connect(dev, tp, dev->otherend_id);
1628 +       if (err)
1629 +               goto fail;
1630 +
1631 +       return 0;
1632 +fail:
1633 +       destroy_tpmring(tp);
1634 +       return err;
1635 +}
1636 +
1637 +
1638 +static void destroy_tpmring(struct tpm_private *tp)
1639 +{
1640 +       tpmif_set_connected_state(tp, 0);
1641 +
1642 +       if (tp->ring_ref != GRANT_INVALID_REF) {
1643 +               gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1644 +               tp->ring_ref = GRANT_INVALID_REF;
1645 +               tp->tx = NULL;
1646 +       }
1647 +
1648 +       if (tp->irq)
1649 +               unbind_from_irqhandler(tp->irq, tp);
1650 +
1651 +       tp->irq = 0;
1652 +}
1653 +
1654 +
1655 +static int talk_to_backend(struct xenbus_device *dev,
1656 +                           struct tpm_private *tp)
1657 +{
1658 +       const char *message = NULL;
1659 +       int err;
1660 +       struct xenbus_transaction xbt;
1661 +
1662 +       err = setup_tpmring(dev, tp);
1663 +       if (err) {
1664 +               xenbus_dev_fatal(dev, err, "setting up ring");
1665 +               goto out;
1666 +       }
1667 +
1668 +again:
1669 +       err = xenbus_transaction_start(&xbt);
1670 +       if (err) {
1671 +               xenbus_dev_fatal(dev, err, "starting transaction");
1672 +               goto destroy_tpmring;
1673 +       }
1674 +
1675 +       err = xenbus_printf(xbt, dev->nodename,
1676 +                           "ring-ref","%u", tp->ring_ref);
1677 +       if (err) {
1678 +               message = "writing ring-ref";
1679 +               goto abort_transaction;
1680 +       }
1681 +
1682 +       err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1683 +                           irq_to_evtchn_port(tp->irq));
1684 +       if (err) {
1685 +               message = "writing event-channel";
1686 +               goto abort_transaction;
1687 +       }
1688 +
1689 +       err = xenbus_transaction_end(xbt, 0);
1690 +       if (err == -EAGAIN)
1691 +               goto again;
1692 +       if (err) {
1693 +               xenbus_dev_fatal(dev, err, "completing transaction");
1694 +               goto destroy_tpmring;
1695 +       }
1696 +
1697 +       xenbus_switch_state(dev, XenbusStateConnected);
1698 +
1699 +       return 0;
1700 +
1701 +abort_transaction:
1702 +       xenbus_transaction_end(xbt, 1);
1703 +       if (message)
1704 +               xenbus_dev_error(dev, err, "%s", message);
1705 +destroy_tpmring:
1706 +       destroy_tpmring(tp);
1707 +out:
1708 +       return err;
1709 +}
1710 +
1711 +/**
1712 + * Callback received when the backend's state changes.
1713 + */
1714 +static void backend_changed(struct xenbus_device *dev,
1715 +                           enum xenbus_state backend_state)
1716 +{
1717 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1718 +       DPRINTK("\n");
1719 +
1720 +       switch (backend_state) {
1721 +       case XenbusStateInitialising:
1722 +       case XenbusStateInitWait:
1723 +       case XenbusStateInitialised:
1724 +       case XenbusStateReconfiguring:
1725 +       case XenbusStateReconfigured:
1726 +       case XenbusStateUnknown:
1727 +               break;
1728 +
1729 +       case XenbusStateConnected:
1730 +               tpmif_set_connected_state(tp, 1);
1731 +               break;
1732 +
1733 +       case XenbusStateClosing:
1734 +               tpmif_set_connected_state(tp, 0);
1735 +               xenbus_frontend_closed(dev);
1736 +               break;
1737 +
1738 +       case XenbusStateClosed:
1739 +               tpmif_set_connected_state(tp, 0);
1740 +               if (tp->is_suspended == 0)
1741 +                       device_unregister(&dev->dev);
1742 +               xenbus_frontend_closed(dev);
1743 +               break;
1744 +       }
1745 +}
1746 +
1747 +static int tpmfront_probe(struct xenbus_device *dev,
1748 +                          const struct xenbus_device_id *id)
1749 +{
1750 +       int err;
1751 +       int handle;
1752 +       struct tpm_private *tp = tpm_private_get();
1753 +
1754 +       if (!tp)
1755 +               return -ENOMEM;
1756 +
1757 +       tp->chip = init_vtpm(&dev->dev, tp);
1758 +       if (IS_ERR(tp->chip))
1759 +               return PTR_ERR(tp->chip);
1760 +
1761 +       err = xenbus_scanf(XBT_NIL, dev->nodename,
1762 +                          "handle", "%i", &handle);
1763 +       if (XENBUS_EXIST_ERR(err))
1764 +               return err;
1765 +
1766 +       if (err < 0) {
1767 +               xenbus_dev_fatal(dev,err,"reading virtual-device");
1768 +               return err;
1769 +       }
1770 +
1771 +       tp->dev = dev;
1772 +
1773 +       err = talk_to_backend(dev, tp);
1774 +       if (err) {
1775 +               tpm_private_put();
1776 +               return err;
1777 +       }
1778 +
1779 +       return 0;
1780 +}
1781 +
1782 +
1783 +static int tpmfront_remove(struct xenbus_device *dev)
1784 +{
1785 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1786 +       destroy_tpmring(tp);
1787 +       cleanup_vtpm(&dev->dev);
1788 +       return 0;
1789 +}
1790 +
1791 +static int tpmfront_suspend(struct xenbus_device *dev)
1792 +{
1793 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1794 +       u32 ctr;
1795 +
1796 +       /* Take the lock, preventing any application from sending. */
1797 +       mutex_lock(&suspend_lock);
1798 +       tp->is_suspended = 1;
1799 +
1800 +       for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1801 +               if ((ctr % 10) == 0)
1802 +                       printk("TPM-FE [INFO]: Waiting for outstanding "
1803 +                              "request.\n");
1804 +               /* Wait for a request to be responded to. */
1805 +               interruptible_sleep_on_timeout(&tp->wait_q, 100);
1806 +       }
1807 +
1808 +       return 0;
1809 +}
1810 +
1811 +static int tpmfront_suspend_finish(struct tpm_private *tp)
1812 +{
1813 +       tp->is_suspended = 0;
1814 +       /* Allow applications to send again. */
1815 +       mutex_unlock(&suspend_lock);
1816 +       return 0;
1817 +}
1818 +
1819 +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1820 +{
1821 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1822 +       return tpmfront_suspend_finish(tp);
1823 +}
1824 +
1825 +static int tpmfront_resume(struct xenbus_device *dev)
1826 +{
1827 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1828 +       destroy_tpmring(tp);
1829 +       return talk_to_backend(dev, tp);
1830 +}
1831 +
1832 +static int tpmif_connect(struct xenbus_device *dev,
1833 +                         struct tpm_private *tp,
1834 +                         domid_t domid)
1835 +{
1836 +       int err;
1837 +
1838 +       tp->backend_id = domid;
1839 +
1840 +       err = bind_listening_port_to_irqhandler(
1841 +               domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1842 +       if (err <= 0) {
1843 +               WPRINTK("bind_listening_port_to_irqhandler failed "
1844 +                       "(err=%d)\n", err);
1845 +               return err;
1846 +       }
1847 +       tp->irq = err;
1848 +
1849 +       return 0;
1850 +}
1851 +
1852 +static struct xenbus_device_id tpmfront_ids[] = {
1853 +       { "vtpm" },
1854 +       { "" }
1855 +};
1856 +
1857 +static struct xenbus_driver tpmfront = {
1858 +       .name = "vtpm",
1859 +       .owner = THIS_MODULE,
1860 +       .ids = tpmfront_ids,
1861 +       .probe = tpmfront_probe,
1862 +       .remove =  tpmfront_remove,
1863 +       .resume = tpmfront_resume,
1864 +       .otherend_changed = backend_changed,
1865 +       .suspend = tpmfront_suspend,
1866 +       .suspend_cancel = tpmfront_suspend_cancel,
1867 +};
1868 +
1869 +static void __init init_tpm_xenbus(void)
1870 +{
1871 +       xenbus_register_frontend(&tpmfront);
1872 +}
1873 +
1874 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1875 +{
1876 +       unsigned int i;
1877 +
1878 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1879 +               tp->tx_buffers[i] = tx_buffer_alloc();
1880 +               if (!tp->tx_buffers[i]) {
1881 +                       tpmif_free_tx_buffers(tp);
1882 +                       return -ENOMEM;
1883 +               }
1884 +       }
1885 +       return 0;
1886 +}
1887 +
1888 +static void tpmif_free_tx_buffers(struct tpm_private *tp)
1889 +{
1890 +       unsigned int i;
1891 +
1892 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1893 +               tx_buffer_free(tp->tx_buffers[i]);
1894 +}
1895 +
1896 +static void tpmif_rx_action(unsigned long priv)
1897 +{
1898 +       struct tpm_private *tp = (struct tpm_private *)priv;
1899 +       int i = 0;
1900 +       unsigned int received;
1901 +       unsigned int offset = 0;
1902 +       u8 *buffer;
1903 +       tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1904 +
1905 +       atomic_set(&tp->tx_busy, 0);
1906 +       wake_up_interruptible(&tp->wait_q);
1907 +
1908 +       received = tx->size;
1909 +
1910 +       buffer = kmalloc(received, GFP_ATOMIC);
1911 +       if (!buffer)
1912 +               return;
1913 +
1914 +       for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1915 +               struct tx_buffer *txb = tp->tx_buffers[i];
1916 +               tpmif_tx_request_t *tx;
1917 +               unsigned int tocopy;
1918 +
1919 +               tx = &tp->tx->ring[i].req;
1920 +               tocopy = tx->size;
1921 +               if (tocopy > PAGE_SIZE)
1922 +                       tocopy = PAGE_SIZE;
1923 +
1924 +               memcpy(&buffer[offset], txb->data, tocopy);
1925 +
1926 +               gnttab_release_grant_reference(&gref_head, tx->ref);
1927 +
1928 +               offset += tocopy;
1929 +       }
1930 +
1931 +       vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
1932 +       kfree(buffer);
1933 +}
1934 +
1935 +
1936 +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
1937 +{
1938 +       struct tpm_private *tp = tpm_priv;
1939 +       unsigned long flags;
1940 +
1941 +       spin_lock_irqsave(&tp->tx_lock, flags);
1942 +       tpmif_rx_tasklet.data = (unsigned long)tp;
1943 +       tasklet_schedule(&tpmif_rx_tasklet);
1944 +       spin_unlock_irqrestore(&tp->tx_lock, flags);
1945 +
1946 +       return IRQ_HANDLED;
1947 +}
1948 +
1949 +
1950 +static int tpm_xmit(struct tpm_private *tp,
1951 +                    const u8 * buf, size_t count, int isuserbuffer,
1952 +                    void *remember)
1953 +{
1954 +       tpmif_tx_request_t *tx;
1955 +       TPMIF_RING_IDX i;
1956 +       unsigned int offset = 0;
1957 +
1958 +       spin_lock_irq(&tp->tx_lock);
1959 +
1960 +       if (unlikely(atomic_read(&tp->tx_busy))) {
1961 +               printk("tpm_xmit: There's an outstanding request/response "
1962 +                      "on the way!\n");
1963 +               spin_unlock_irq(&tp->tx_lock);
1964 +               return -EBUSY;
1965 +       }
1966 +
1967 +       if (tp->is_connected != 1) {
1968 +               spin_unlock_irq(&tp->tx_lock);
1969 +               return -EIO;
1970 +       }
1971 +
1972 +       for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
1973 +               struct tx_buffer *txb = tp->tx_buffers[i];
1974 +               int copied;
1975 +
1976 +               if (!txb) {
1977 +                       DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
1978 +                               "Not transmitting anything!\n", i);
1979 +                       spin_unlock_irq(&tp->tx_lock);
1980 +                       return -EFAULT;
1981 +               }
1982 +
1983 +               copied = tx_buffer_copy(txb, &buf[offset], count,
1984 +                                       isuserbuffer);
1985 +               if (copied < 0) {
1986 +                       /* An error occurred */
1987 +                       spin_unlock_irq(&tp->tx_lock);
1988 +                       return copied;
1989 +               }
1990 +               count -= copied;
1991 +               offset += copied;
1992 +
1993 +               tx = &tp->tx->ring[i].req;
1994 +               tx->addr = virt_to_machine(txb->data);
1995 +               tx->size = txb->len;
1996 +               tx->unused = 0;
1997 +
1998 +               DPRINTK("First 4 characters sent by TPM-FE are "
1999 +                       "0x%02x 0x%02x 0x%02x 0x%02x\n",
2000 +                       txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
2001 +
2002 +               /* Get the granttable reference for this page. */
2003 +               tx->ref = gnttab_claim_grant_reference(&gref_head);
2004 +               if (tx->ref == -ENOSPC) {
2005 +                       spin_unlock_irq(&tp->tx_lock);
2006 +                       DPRINTK("Grant table claim reference failed in "
2007 +                               "func:%s line:%d file:%s\n",
2008 +                               __FUNCTION__, __LINE__, __FILE__);
2009 +                       return -ENOSPC;
2010 +               }
2011 +               gnttab_grant_foreign_access_ref(tx->ref,
2012 +                                               tp->backend_id,
2013 +                                               virt_to_mfn(txb->data),
2014 +                                               0 /*RW*/);
2015 +               wmb();
2016 +       }
2017 +
2018 +       atomic_set(&tp->tx_busy, 1);
2019 +       tp->tx_remember = remember;
2020 +
2021 +       mb();
2022 +
2023 +       notify_remote_via_irq(tp->irq);
2024 +
2025 +       spin_unlock_irq(&tp->tx_lock);
2026 +       return offset;
2027 +}
2028 +
2029 +
2030 +static void tpmif_notify_upperlayer(struct tpm_private *tp)
2031 +{
2032 +       /* Notify upper layer about the state of the connection to the BE. */
2033 +       vtpm_vd_status(tp->chip, (tp->is_connected
2034 +                                 ? TPM_VD_STATUS_CONNECTED
2035 +                                 : TPM_VD_STATUS_DISCONNECTED));
2036 +}
2037 +
2038 +
2039 +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
2040 +{
2041 +       /*
2042 +        * Don't notify upper layer if we are in suspend mode and
2043 +        * should disconnect - assumption is that we will resume
2044 +        * The mutex keeps apps from sending.
2045 +        */
2046 +       if (is_connected == 0 && tp->is_suspended == 1)
2047 +               return;
2048 +
2049 +       /*
2050 +        * Unlock the mutex if we are connected again
2051 +        * after being suspended - now resuming.
2052 +        * This also removes the suspend state.
2053 +        */
2054 +       if (is_connected == 1 && tp->is_suspended == 1)
2055 +               tpmfront_suspend_finish(tp);
2056 +
2057 +       if (is_connected != tp->is_connected) {
2058 +               tp->is_connected = is_connected;
2059 +               tpmif_notify_upperlayer(tp);
2060 +       }
2061 +}
2062 +
2063 +
2064 +
2065 +/* =================================================================
2066 + * Initialization function.
2067 + * =================================================================
2068 + */
2069 +
2070 +
2071 +static int __init tpmif_init(void)
2072 +{
2073 +       struct tpm_private *tp;
2074 +
2075 +       if (is_initial_xendomain())
2076 +               return -EPERM;
2077 +
2078 +       tp = tpm_private_get();
2079 +       if (!tp)
2080 +               return -ENOMEM;
2081 +
2082 +       IPRINTK("Initialising the vTPM driver.\n");
2083 +       if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
2084 +                                         &gref_head) < 0) {
2085 +               tpm_private_put();
2086 +               return -EFAULT;
2087 +       }
2088 +
2089 +       init_tpm_xenbus();
2090 +       return 0;
2091 +}
2092 +
2093 +
2094 +module_init(tpmif_init);
2095 +
2096 +MODULE_LICENSE("Dual BSD/GPL");
2097 --- head-2011-07-21.orig/drivers/edac/edac_mc.c 2011-07-21 12:00:01.000000000 +0200
2098 +++ head-2011-07-21/drivers/edac/edac_mc.c      2011-06-30 15:37:01.000000000 +0200
2099 @@ -611,6 +611,10 @@ static void edac_mc_scrub_block(unsigned
2100  
2101         debugf3("%s()\n", __func__);
2102  
2103 +#ifdef CONFIG_XEN
2104 +       page = mfn_to_local_pfn(page);
2105 +#endif
2106 +
2107         /* ECC error page was not in our memory. Ignore it. */
2108         if (!pfn_valid(page))
2109                 return;
2110 --- head-2011-07-21.orig/drivers/firmware/dell_rbu.c    2011-07-21 12:00:01.000000000 +0200
2111 +++ head-2011-07-21/drivers/firmware/dell_rbu.c 2011-04-13 11:25:29.000000000 +0200
2112 @@ -170,9 +170,28 @@ static int create_packet(void *data, siz
2113                         spin_lock(&rbu_data.lock);
2114                         goto out_alloc_packet_array;
2115                 }
2116 +#ifdef CONFIG_XEN
2117 +               if (ordernum && xen_create_contiguous_region(
2118 +                       (unsigned long)packet_data_temp_buf, ordernum, 0)) {
2119 +                       free_pages((unsigned long)packet_data_temp_buf,
2120 +                                  ordernum);
2121 +                       printk(KERN_WARNING
2122 +                               "dell_rbu:%s: failed to adjust new "
2123 +                               "packet\n", __func__);
2124 +                       retval = -ENOMEM;
2125 +                       spin_lock(&rbu_data.lock);
2126 +                       goto out_alloc_packet_array;
2127 +               }
2128 +#endif
2129  
2130 -               if ((unsigned long)virt_to_phys(packet_data_temp_buf)
2131 +               if ((unsigned long)virt_to_bus(packet_data_temp_buf)
2132                                 < allocation_floor) {
2133 +#ifdef CONFIG_XEN
2134 +                       if (ordernum)
2135 +                               xen_destroy_contiguous_region(
2136 +                                       (unsigned long)packet_data_temp_buf,
2137 +                                       ordernum);
2138 +#endif
2139                         pr_debug("packet 0x%lx below floor at 0x%lx.\n",
2140                                         (unsigned long)virt_to_phys(
2141                                                 packet_data_temp_buf),
2142 @@ -186,7 +205,7 @@ static int create_packet(void *data, siz
2143         newpacket->data = packet_data_temp_buf;
2144  
2145         pr_debug("create_packet: newpacket at physical addr %lx\n",
2146 -               (unsigned long)virt_to_phys(newpacket->data));
2147 +               (unsigned long)virt_to_bus(newpacket->data));
2148  
2149         /* packets may not have fixed size */
2150         newpacket->length = length;
2151 @@ -205,7 +224,7 @@ out_alloc_packet_array:
2152         /* always free packet array */
2153         for (;idx>0;idx--) {
2154                 pr_debug("freeing unused packet below floor 0x%lx.\n",
2155 -                       (unsigned long)virt_to_phys(
2156 +                       (unsigned long)virt_to_bus(
2157                                 invalid_addr_packet_array[idx-1]));
2158                 free_pages((unsigned long)invalid_addr_packet_array[idx-1],
2159                         ordernum);
2160 @@ -349,6 +368,13 @@ static void packet_empty_list(void)
2161                  * to make sure there are no stale RBU packets left in memory
2162                  */
2163                 memset(newpacket->data, 0, rbu_data.packetsize);
2164 +#ifdef CONFIG_XEN
2165 +               if (newpacket->ordernum)
2166 +                       xen_destroy_contiguous_region(
2167 +                               (unsigned long)newpacket->data,
2168 +                               newpacket->ordernum);
2169 +#endif
2170 +
2171                 free_pages((unsigned long) newpacket->data,
2172                         newpacket->ordernum);
2173                 kfree(newpacket);
2174 @@ -403,7 +429,9 @@ static int img_update_realloc(unsigned l
2175  {
2176         unsigned char *image_update_buffer = NULL;
2177         unsigned long rc;
2178 +#ifndef CONFIG_XEN
2179         unsigned long img_buf_phys_addr;
2180 +#endif
2181         int ordernum;
2182         int dma_alloc = 0;
2183  
2184 @@ -434,15 +462,19 @@ static int img_update_realloc(unsigned l
2185  
2186         spin_unlock(&rbu_data.lock);
2187  
2188 +#ifndef CONFIG_XEN
2189         ordernum = get_order(size);
2190         image_update_buffer =
2191                 (unsigned char *) __get_free_pages(GFP_KERNEL, ordernum);
2192  
2193         img_buf_phys_addr =
2194 -               (unsigned long) virt_to_phys(image_update_buffer);
2195 +               (unsigned long) virt_to_bus(image_update_buffer);
2196  
2197         if (img_buf_phys_addr > BIOS_SCAN_LIMIT) {
2198                 free_pages((unsigned long) image_update_buffer, ordernum);
2199 +#else
2200 +       {
2201 +#endif
2202                 ordernum = -1;
2203                 image_update_buffer = dma_alloc_coherent(NULL, size,
2204                         &dell_rbu_dmaaddr, GFP_KERNEL);
2205 @@ -695,6 +727,12 @@ static struct bin_attribute rbu_packet_s
2206  static int __init dcdrbu_init(void)
2207  {
2208         int rc;
2209 +
2210 +#ifdef CONFIG_XEN
2211 +       if (!is_initial_xendomain())
2212 +               return -ENODEV;
2213 +#endif
2214 +
2215         spin_lock_init(&rbu_data.lock);
2216  
2217         init_packet_head();
2218 --- head-2011-07-21.orig/drivers/ide/ide-lib.c  2011-07-21 12:00:01.000000000 +0200
2219 +++ head-2011-07-21/drivers/ide/ide-lib.c       2011-04-13 11:25:29.000000000 +0200
2220 @@ -18,12 +18,12 @@ void ide_toggle_bounce(ide_drive_t *driv
2221  {
2222         u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
2223  
2224 -       if (!PCI_DMA_BUS_IS_PHYS) {
2225 -               addr = BLK_BOUNCE_ANY;
2226 -       } else if (on && drive->media == ide_disk) {
2227 +       if (on && drive->media == ide_disk) {
2228                 struct device *dev = drive->hwif->dev;
2229  
2230 -               if (dev && dev->dma_mask)
2231 +               if (!PCI_DMA_BUS_IS_PHYS)
2232 +                       addr = BLK_BOUNCE_ANY;
2233 +               else if (dev && dev->dma_mask)
2234                         addr = *dev->dma_mask;
2235         }
2236  
2237 --- head-2011-07-21.orig/drivers/oprofile/buffer_sync.c 2011-07-21 12:00:01.000000000 +0200
2238 +++ head-2011-07-21/drivers/oprofile/buffer_sync.c      2011-06-30 15:37:10.000000000 +0200
2239 @@ -8,6 +8,10 @@
2240   * @author Barry Kasindorf
2241   * @author Robert Richter <robert.richter@amd.com>
2242   *
2243 + * Modified by Aravind Menon for Xen
2244 + * These modifications are:
2245 + * Copyright (C) 2005 Hewlett-Packard Co.
2246 + *
2247   * This is the core of the buffer management. Each
2248   * CPU buffer is processed and entered into the
2249   * global event buffer. Such processing is necessary
2250 @@ -43,6 +47,8 @@ static cpumask_var_t marked_cpus;
2251  static DEFINE_SPINLOCK(task_mortuary);
2252  static void process_task_mortuary(void);
2253  
2254 +static int cpu_current_domain[NR_CPUS];
2255 +
2256  /* Take ownership of the task struct and place it on the
2257   * list for processing. Only after two full buffer syncs
2258   * does the task eventually get freed, because by then
2259 @@ -61,7 +67,6 @@ task_free_notify(struct notifier_block *
2260         return NOTIFY_OK;
2261  }
2262  
2263 -
2264  /* The task is on its way out. A sync of the buffer means we can catch
2265   * any remaining samples for this task.
2266   */
2267 @@ -151,6 +156,11 @@ static void free_all_tasks(void)
2268  int sync_start(void)
2269  {
2270         int err;
2271 +       int i;
2272 +
2273 +       for (i = 0; i < NR_CPUS; i++) {
2274 +               cpu_current_domain[i] = COORDINATOR_DOMAIN;
2275 +       }
2276  
2277         if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
2278                 return -ENOMEM;
2279 @@ -287,13 +297,29 @@ static void add_cpu_switch(int i)
2280         last_cookie = INVALID_COOKIE;
2281  }
2282  
2283 -static void add_kernel_ctx_switch(unsigned int in_kernel)
2284 +static void add_cpu_mode_switch(unsigned int cpu_mode)
2285  {
2286         add_event_entry(ESCAPE_CODE);
2287 -       if (in_kernel)
2288 +       switch (cpu_mode) {
2289 +       case CPU_MODE_USER:
2290 +               add_event_entry(USER_ENTER_SWITCH_CODE);
2291 +               break;
2292 +       case CPU_MODE_KERNEL:
2293                 add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2294 -       else
2295 -               add_event_entry(KERNEL_EXIT_SWITCH_CODE);
2296 +               break;
2297 +       case CPU_MODE_XEN:
2298 +               add_event_entry(XEN_ENTER_SWITCH_CODE);
2299 +               break;
2300 +       default:
2301 +               break;
2302 +       }
2303 +}
2304 +
2305 +static void add_domain_switch(unsigned long domain_id)
2306 +{
2307 +       add_event_entry(ESCAPE_CODE);
2308 +       add_event_entry(DOMAIN_SWITCH_CODE);
2309 +       add_event_entry(domain_id);
2310  }
2311  
2312  static void
2313 @@ -374,12 +400,12 @@ static inline void add_sample_entry(unsi
2314   * for later lookup from userspace. Return 0 on failure.
2315   */
2316  static int
2317 -add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
2318 +add_sample(struct mm_struct *mm, struct op_sample *s, int cpu_mode)
2319  {
2320         unsigned long cookie;
2321         off_t offset;
2322  
2323 -       if (in_kernel) {
2324 +       if (cpu_mode >= CPU_MODE_KERNEL) {
2325                 add_sample_entry(s->eip, s->event);
2326                 return 1;
2327         }
2328 @@ -504,9 +530,10 @@ void sync_buffer(int cpu)
2329         unsigned long val;
2330         struct task_struct *new;
2331         unsigned long cookie = 0;
2332 -       int in_kernel = 1;
2333 +       int cpu_mode = CPU_MODE_KERNEL;
2334         sync_buffer_state state = sb_buffer_start;
2335         unsigned int i;
2336 +       int domain_switch = 0;
2337         unsigned long available;
2338         unsigned long flags;
2339         struct op_entry entry;
2340 @@ -516,6 +543,11 @@ void sync_buffer(int cpu)
2341  
2342         add_cpu_switch(cpu);
2343  
2344 +       /* We need to assign the first samples in this CPU buffer to the
2345 +          same domain that we were processing at the last sync_buffer */
2346 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2347 +               add_domain_switch(cpu_current_domain[cpu]);
2348 +
2349         op_cpu_buffer_reset(cpu);
2350         available = op_cpu_buffer_entries(cpu);
2351  
2352 @@ -524,6 +556,13 @@ void sync_buffer(int cpu)
2353                 if (!sample)
2354                         break;
2355  
2356 +               if (domain_switch) {
2357 +                       cpu_current_domain[cpu] = sample->eip;
2358 +                       add_domain_switch(sample->eip);
2359 +                       domain_switch = 0;
2360 +                       continue;
2361 +               }
2362 +
2363                 if (is_code(sample->eip)) {
2364                         flags = sample->event;
2365                         if (flags & TRACE_BEGIN) {
2366 @@ -532,10 +571,10 @@ void sync_buffer(int cpu)
2367                         }
2368                         if (flags & KERNEL_CTX_SWITCH) {
2369                                 /* kernel/userspace switch */
2370 -                               in_kernel = flags & IS_KERNEL;
2371 +                               cpu_mode = flags & CPU_MODE_MASK;
2372                                 if (state == sb_buffer_start)
2373                                         state = sb_sample_start;
2374 -                               add_kernel_ctx_switch(flags & IS_KERNEL);
2375 +                               add_cpu_mode_switch(cpu_mode);
2376                         }
2377                         if (flags & USER_CTX_SWITCH
2378                             && op_cpu_buffer_get_data(&entry, &val)) {
2379 @@ -548,16 +587,23 @@ void sync_buffer(int cpu)
2380                                         cookie = get_exec_dcookie(mm);
2381                                 add_user_ctx_switch(new, cookie);
2382                         }
2383 +                       if (flags & DOMAIN_SWITCH)
2384 +                               domain_switch = 1;
2385                         if (op_cpu_buffer_get_size(&entry))
2386                                 add_data(&entry, mm);
2387                         continue;
2388                 }
2389  
2390 +               if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2391 +                       add_sample_entry(sample->eip, sample->event);
2392 +                       continue;
2393 +               }
2394 +
2395                 if (state < sb_bt_start)
2396                         /* ignore sample */
2397                         continue;
2398  
2399 -               if (add_sample(mm, sample, in_kernel))
2400 +               if (add_sample(mm, sample, cpu_mode))
2401                         continue;
2402  
2403                 /* ignore backtraces if failed to add a sample */
2404 @@ -568,6 +614,10 @@ void sync_buffer(int cpu)
2405         }
2406         release_mm(mm);
2407  
2408 +       /* We reset domain to COORDINATOR at each CPU switch */
2409 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2410 +               add_domain_switch(COORDINATOR_DOMAIN);
2411 +
2412         mark_done(cpu);
2413  
2414         mutex_unlock(&buffer_mutex);
2415 --- head-2011-07-21.orig/drivers/oprofile/cpu_buffer.c  2011-07-21 12:00:01.000000000 +0200
2416 +++ head-2011-07-21/drivers/oprofile/cpu_buffer.c       2011-04-13 11:25:29.000000000 +0200
2417 @@ -8,6 +8,10 @@
2418   * @author Barry Kasindorf <barry.kasindorf@amd.com>
2419   * @author Robert Richter <robert.richter@amd.com>
2420   *
2421 + * Modified by Aravind Menon for Xen
2422 + * These modifications are:
2423 + * Copyright (C) 2005 Hewlett-Packard Co.
2424 + *
2425   * Each CPU has a local buffer that stores PC value/event
2426   * pairs. We also log context switches when we notice them.
2427   * Eventually each CPU's buffer is processed into the global
2428 @@ -38,6 +42,8 @@ static void wq_sync_buffer(struct work_s
2429  #define DEFAULT_TIMER_EXPIRE (HZ / 10)
2430  static int work_enabled;
2431  
2432 +static int32_t current_domain = COORDINATOR_DOMAIN;
2433 +
2434  unsigned long oprofile_get_cpu_buffer_size(void)
2435  {
2436         return oprofile_cpu_buffer_size;
2437 @@ -75,7 +81,7 @@ int alloc_cpu_buffers(void)
2438                 struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
2439  
2440                 b->last_task = NULL;
2441 -               b->last_is_kernel = -1;
2442 +               b->last_cpu_mode = -1;
2443                 b->tracing = 0;
2444                 b->buffer_size = buffer_size;
2445                 b->sample_received = 0;
2446 @@ -180,7 +186,7 @@ unsigned long op_cpu_buffer_entries(int 
2447  
2448  static int
2449  op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
2450 -           int is_kernel, struct task_struct *task)
2451 +           int cpu_mode, struct task_struct *task)
2452  {
2453         struct op_entry entry;
2454         struct op_sample *sample;
2455 @@ -193,16 +199,15 @@ op_add_code(struct oprofile_cpu_buffer *
2456                 flags |= TRACE_BEGIN;
2457  
2458         /* notice a switch from user->kernel or vice versa */
2459 -       is_kernel = !!is_kernel;
2460 -       if (cpu_buf->last_is_kernel != is_kernel) {
2461 -               cpu_buf->last_is_kernel = is_kernel;
2462 -               flags |= KERNEL_CTX_SWITCH;
2463 -               if (is_kernel)
2464 -                       flags |= IS_KERNEL;
2465 +       if (cpu_buf->last_cpu_mode != cpu_mode) {
2466 +               cpu_buf->last_cpu_mode = cpu_mode;
2467 +               flags |= KERNEL_CTX_SWITCH | cpu_mode;
2468         }
2469  
2470         /* notice a task switch */
2471 -       if (cpu_buf->last_task != task) {
2472 +       /* if not processing other domain samples */
2473 +       if (cpu_buf->last_task != task &&
2474 +           current_domain == COORDINATOR_DOMAIN) {
2475                 cpu_buf->last_task = task;
2476                 flags |= USER_CTX_SWITCH;
2477         }
2478 @@ -251,14 +256,14 @@ op_add_sample(struct oprofile_cpu_buffer
2479  /*
2480   * This must be safe from any context.
2481   *
2482 - * is_kernel is needed because on some architectures you cannot
2483 + * cpu_mode is needed because on some architectures you cannot
2484   * tell if you are in kernel or user space simply by looking at
2485 - * pc. We tag this in the buffer by generating kernel enter/exit
2486 - * events whenever is_kernel changes
2487 + * pc. We tag this in the buffer by generating kernel/user (and
2488 + * xen) enter events whenever cpu_mode changes
2489   */
2490  static int
2491  log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
2492 -          unsigned long backtrace, int is_kernel, unsigned long event,
2493 +          unsigned long backtrace, int cpu_mode, unsigned long event,
2494            struct task_struct *task)
2495  {
2496         struct task_struct *tsk = task ? task : current;
2497 @@ -269,7 +274,7 @@ log_sample(struct oprofile_cpu_buffer *c
2498                 return 0;
2499         }
2500  
2501 -       if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
2502 +       if (op_add_code(cpu_buf, backtrace, cpu_mode, tsk))
2503                 goto fail;
2504  
2505         if (op_add_sample(cpu_buf, pc, event))
2506 @@ -440,6 +445,25 @@ fail:
2507         return;
2508  }
2509  
2510 +int oprofile_add_domain_switch(int32_t domain_id)
2511 +{
2512 +       struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
2513 +
2514 +       /* should have space for switching into and out of domain
2515 +          (2 slots each) plus one sample and one cpu mode switch */
2516 +       if (((nr_available_slots(cpu_buf) < 6) &&
2517 +            (domain_id != COORDINATOR_DOMAIN)) ||
2518 +           (nr_available_slots(cpu_buf) < 2))
2519 +               return 0;
2520 +
2521 +       add_code(cpu_buf, DOMAIN_SWITCH);
2522 +       add_sample(cpu_buf, domain_id, 0);
2523 +
2524 +       current_domain = domain_id;
2525 +
2526 +       return 1;
2527 +}
2528 +
2529  /*
2530   * This serves to avoid cpu buffer overflow, and makes sure
2531   * the task mortuary progresses
2532 --- head-2011-07-21.orig/drivers/oprofile/cpu_buffer.h  2011-07-21 12:00:01.000000000 +0200
2533 +++ head-2011-07-21/drivers/oprofile/cpu_buffer.h       2011-04-13 11:25:29.000000000 +0200
2534 @@ -41,7 +41,7 @@ struct op_entry;
2535  struct oprofile_cpu_buffer {
2536         unsigned long buffer_size;
2537         struct task_struct *last_task;
2538 -       int last_is_kernel;
2539 +       int last_cpu_mode;
2540         int tracing;
2541         unsigned long sample_received;
2542         unsigned long sample_lost_overflow;
2543 @@ -63,7 +63,7 @@ static inline void op_cpu_buffer_reset(i
2544  {
2545         struct oprofile_cpu_buffer *cpu_buf = &per_cpu(op_cpu_buffer, cpu);
2546  
2547 -       cpu_buf->last_is_kernel = -1;
2548 +       cpu_buf->last_cpu_mode = -1;
2549         cpu_buf->last_task = NULL;
2550  }
2551  
2552 @@ -113,9 +113,13 @@ int op_cpu_buffer_get_data(struct op_ent
2553  }
2554  
2555  /* extra data flags */
2556 -#define KERNEL_CTX_SWITCH      (1UL << 0)
2557 -#define IS_KERNEL              (1UL << 1)
2558 +#define CPU_MODE_USER          0
2559 +#define CPU_MODE_KERNEL                1
2560 +#define CPU_MODE_XEN           2
2561 +#define CPU_MODE_MASK          3
2562  #define TRACE_BEGIN            (1UL << 2)
2563  #define USER_CTX_SWITCH                (1UL << 3)
2564 +#define KERNEL_CTX_SWITCH      (1UL << 4)
2565 +#define DOMAIN_SWITCH          (1UL << 5)
2566  
2567  #endif /* OPROFILE_CPU_BUFFER_H */
2568 --- head-2011-07-21.orig/drivers/oprofile/event_buffer.h        2011-07-21 12:00:01.000000000 +0200
2569 +++ head-2011-07-21/drivers/oprofile/event_buffer.h     2011-04-13 11:25:29.000000000 +0200
2570 @@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void);
2571  #define INVALID_COOKIE ~0UL
2572  #define NO_COOKIE 0UL
2573  
2574 +/* Constant used to refer to coordinator domain (Xen) */
2575 +#define COORDINATOR_DOMAIN -1
2576 +
2577  extern const struct file_operations event_buffer_fops;
2578  
2579  /* mutex between sync_cpu_buffers() and the
2580 --- head-2011-07-21.orig/drivers/oprofile/oprof.c       2011-07-21 12:00:01.000000000 +0200
2581 +++ head-2011-07-21/drivers/oprofile/oprof.c    2011-04-13 11:25:29.000000000 +0200
2582 @@ -5,6 +5,10 @@
2583   * @remark Read the file COPYING
2584   *
2585   * @author John Levon <levon@movementarian.org>
2586 + *
2587 + * Modified by Aravind Menon for Xen
2588 + * These modifications are:
2589 + * Copyright (C) 2005 Hewlett-Packard Co.
2590   */
2591  
2592  #include <linux/kernel.h>
2593 @@ -35,6 +39,32 @@ static DEFINE_MUTEX(start_mutex);
2594   */
2595  static int timer = 0;
2596  
2597 +int oprofile_set_active(int active_domains[], unsigned int adomains)
2598 +{
2599 +       int err;
2600 +
2601 +       if (!oprofile_ops.set_active)
2602 +               return -EINVAL;
2603 +
2604 +       mutex_lock(&start_mutex);
2605 +       err = oprofile_ops.set_active(active_domains, adomains);
2606 +       mutex_unlock(&start_mutex);
2607 +       return err;
2608 +}
2609 +
2610 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
2611 +{
2612 +       int err;
2613 +
2614 +       if (!oprofile_ops.set_passive)
2615 +               return -EINVAL;
2616 +
2617 +       mutex_lock(&start_mutex);
2618 +       err = oprofile_ops.set_passive(passive_domains, pdomains);
2619 +       mutex_unlock(&start_mutex);
2620 +       return err;
2621 +}
2622 +
2623  int oprofile_setup(void)
2624  {
2625         int err;
2626 --- head-2011-07-21.orig/drivers/oprofile/oprof.h       2011-07-21 12:00:01.000000000 +0200
2627 +++ head-2011-07-21/drivers/oprofile/oprof.h    2011-04-13 11:25:29.000000000 +0200
2628 @@ -40,4 +40,7 @@ void oprofile_timer_exit(void);
2629  int oprofile_set_ulong(unsigned long *addr, unsigned long val);
2630  int oprofile_set_timeout(unsigned long time);
2631  
2632 +int oprofile_set_active(int active_domains[], unsigned int adomains);
2633 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
2634 +
2635  #endif /* OPROF_H */
2636 --- head-2011-07-21.orig/drivers/oprofile/oprofile_files.c      2011-07-21 12:00:01.000000000 +0200
2637 +++ head-2011-07-21/drivers/oprofile/oprofile_files.c   2011-04-13 11:25:29.000000000 +0200
2638 @@ -5,11 +5,17 @@
2639   * @remark Read the file COPYING
2640   *
2641   * @author John Levon <levon@movementarian.org>
2642 + *
2643 + * Modified by Aravind Menon for Xen
2644 + * These modifications are:
2645 + * Copyright (C) 2005 Hewlett-Packard Co.
2646   */
2647  
2648  #include <linux/fs.h>
2649  #include <linux/oprofile.h>
2650  #include <linux/jiffies.h>
2651 +#include <asm/uaccess.h>
2652 +#include <linux/ctype.h>
2653  
2654  #include "event_buffer.h"
2655  #include "oprofile_stats.h"
2656 @@ -174,6 +180,195 @@ static const struct file_operations dump
2657         .llseek         = noop_llseek,
2658  };
2659  
2660 +#define TMPBUFSIZE 512
2661 +
2662 +static unsigned int adomains = 0;
2663 +static int active_domains[MAX_OPROF_DOMAINS + 1];
2664 +static DEFINE_MUTEX(adom_mutex);
2665 +
2666 +static ssize_t adomain_write(struct file * file, char const __user * buf,
2667 +                            size_t count, loff_t * offset)
2668 +{
2669 +       char *tmpbuf;
2670 +       char *startp, *endp;
2671 +       int i;
2672 +       unsigned long val;
2673 +       ssize_t retval = count;
2674 +
2675 +       if (*offset)
2676 +               return -EINVAL;
2677 +       if (count > TMPBUFSIZE - 1)
2678 +               return -EINVAL;
2679 +
2680 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2681 +               return -ENOMEM;
2682 +
2683 +       if (copy_from_user(tmpbuf, buf, count)) {
2684 +               kfree(tmpbuf);
2685 +               return -EFAULT;
2686 +       }
2687 +       tmpbuf[count] = 0;
2688 +
2689 +       mutex_lock(&adom_mutex);
2690 +
2691 +       startp = tmpbuf;
2692 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2693 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2694 +               val = simple_strtoul(startp, &endp, 0);
2695 +               if (endp == startp)
2696 +                       break;
2697 +               while (ispunct(*endp) || isspace(*endp))
2698 +                       endp++;
2699 +               active_domains[i] = val;
2700 +               if (active_domains[i] != val)
2701 +                       /* Overflow, force error below */
2702 +                       i = MAX_OPROF_DOMAINS + 1;
2703 +               startp = endp;
2704 +       }
2705 +       /* Force error on trailing junk */
2706 +       adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2707 +
2708 +       kfree(tmpbuf);
2709 +
2710 +       if (adomains > MAX_OPROF_DOMAINS
2711 +           || oprofile_set_active(active_domains, adomains)) {
2712 +               adomains = 0;
2713 +               retval = -EINVAL;
2714 +       }
2715 +
2716 +       mutex_unlock(&adom_mutex);
2717 +       return retval;
2718 +}
2719 +
2720 +static ssize_t adomain_read(struct file * file, char __user * buf,
2721 +                           size_t count, loff_t * offset)
2722 +{
2723 +       char * tmpbuf;
2724 +       size_t len;
2725 +       int i;
2726 +       ssize_t retval;
2727 +
2728 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2729 +               return -ENOMEM;
2730 +
2731 +       mutex_lock(&adom_mutex);
2732 +
2733 +       len = 0;
2734 +       for (i = 0; i < adomains; i++)
2735 +               len += snprintf(tmpbuf + len,
2736 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2737 +                               "%u ", active_domains[i]);
2738 +       WARN_ON(len > TMPBUFSIZE);
2739 +       if (len != 0 && len <= TMPBUFSIZE)
2740 +               tmpbuf[len-1] = '\n';
2741 +
2742 +       mutex_unlock(&adom_mutex);
2743 +
2744 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2745 +
2746 +       kfree(tmpbuf);
2747 +       return retval;
2748 +}
2749 +
2750 +
2751 +static const struct file_operations active_domain_ops = {
2752 +       .read           = adomain_read,
2753 +       .write          = adomain_write,
2754 +};
2755 +
2756 +static unsigned int pdomains = 0;
2757 +static int passive_domains[MAX_OPROF_DOMAINS];
2758 +static DEFINE_MUTEX(pdom_mutex);
2759 +
2760 +static ssize_t pdomain_write(struct file * file, char const __user * buf,
2761 +                            size_t count, loff_t * offset)
2762 +{
2763 +       char *tmpbuf;
2764 +       char *startp, *endp;
2765 +       int i;
2766 +       unsigned long val;
2767 +       ssize_t retval = count;
2768 +
2769 +       if (*offset)
2770 +               return -EINVAL;
2771 +       if (count > TMPBUFSIZE - 1)
2772 +               return -EINVAL;
2773 +
2774 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2775 +               return -ENOMEM;
2776 +
2777 +       if (copy_from_user(tmpbuf, buf, count)) {
2778 +               kfree(tmpbuf);
2779 +               return -EFAULT;
2780 +       }
2781 +       tmpbuf[count] = 0;
2782 +
2783 +       mutex_lock(&pdom_mutex);
2784 +
2785 +       startp = tmpbuf;
2786 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2787 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2788 +               val = simple_strtoul(startp, &endp, 0);
2789 +               if (endp == startp)
2790 +                       break;
2791 +               while (ispunct(*endp) || isspace(*endp))
2792 +                       endp++;
2793 +               passive_domains[i] = val;
2794 +               if (passive_domains[i] != val)
2795 +                       /* Overflow, force error below */
2796 +                       i = MAX_OPROF_DOMAINS + 1;
2797 +               startp = endp;
2798 +       }
2799 +       /* Force error on trailing junk */
2800 +       pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2801 +
2802 +       kfree(tmpbuf);
2803 +
2804 +       if (pdomains > MAX_OPROF_DOMAINS
2805 +           || oprofile_set_passive(passive_domains, pdomains)) {
2806 +               pdomains = 0;
2807 +               retval = -EINVAL;
2808 +       }
2809 +
2810 +       mutex_unlock(&pdom_mutex);
2811 +       return retval;
2812 +}
2813 +
2814 +static ssize_t pdomain_read(struct file * file, char __user * buf,
2815 +                           size_t count, loff_t * offset)
2816 +{
2817 +       char * tmpbuf;
2818 +       size_t len;
2819 +       int i;
2820 +       ssize_t retval;
2821 +
2822 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2823 +               return -ENOMEM;
2824 +
2825 +       mutex_lock(&pdom_mutex);
2826 +
2827 +       len = 0;
2828 +       for (i = 0; i < pdomains; i++)
2829 +               len += snprintf(tmpbuf + len,
2830 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2831 +                               "%u ", passive_domains[i]);
2832 +       WARN_ON(len > TMPBUFSIZE);
2833 +       if (len != 0 && len <= TMPBUFSIZE)
2834 +               tmpbuf[len-1] = '\n';
2835 +
2836 +       mutex_unlock(&pdom_mutex);
2837 +
2838 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2839 +
2840 +       kfree(tmpbuf);
2841 +       return retval;
2842 +}
2843 +
2844 +static const struct file_operations passive_domain_ops = {
2845 +       .read           = pdomain_read,
2846 +       .write          = pdomain_write,
2847 +};
2848 +
2849  void oprofile_create_files(struct super_block *sb, struct dentry *root)
2850  {
2851         /* reinitialize default values */
2852 @@ -184,6 +379,8 @@ void oprofile_create_files(struct super_
2853  
2854         oprofilefs_create_file(sb, root, "enable", &enable_fops);
2855         oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2856 +       oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2857 +       oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2858         oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2859         oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size);
2860         oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed);
2861 --- head-2011-07-21.orig/fs/aio.c       2011-07-21 12:00:01.000000000 +0200
2862 +++ head-2011-07-21/fs/aio.c    2011-04-13 11:25:29.000000000 +0200
2863 @@ -39,6 +39,11 @@
2864  #include <asm/kmap_types.h>
2865  #include <asm/uaccess.h>
2866  
2867 +#ifdef CONFIG_EPOLL
2868 +#include <linux/poll.h>
2869 +#include <linux/eventpoll.h>
2870 +#endif
2871 +
2872  #if DEBUG > 1
2873  #define dprintk                printk
2874  #else
2875 @@ -991,6 +996,11 @@ put_rq:
2876         if (waitqueue_active(&ctx->wait))
2877                 wake_up(&ctx->wait);
2878  
2879 +#ifdef CONFIG_EPOLL
2880 +       if (ctx->file && waitqueue_active(&ctx->poll_wait))
2881 +               wake_up(&ctx->poll_wait);
2882 +#endif
2883 +
2884         spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2885         return ret;
2886  }
2887 @@ -999,6 +1009,8 @@ EXPORT_SYMBOL(aio_complete);
2888  /* aio_read_evt
2889   *     Pull an event off of the ioctx's event ring.  Returns the number of 
2890   *     events fetched (0 or 1 ;-)
2891 + *     If ent parameter is 0, just returns the number of events that would
2892 + *     be fetched.
2893   *     FIXME: make this use cmpxchg.
2894   *     TODO: make the ringbuffer user mmap()able (requires FIXME).
2895   */
2896 @@ -1021,13 +1033,18 @@ static int aio_read_evt(struct kioctx *i
2897  
2898         head = ring->head % info->nr;
2899         if (head != ring->tail) {
2900 -               struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2901 -               *ent = *evp;
2902 -               head = (head + 1) % info->nr;
2903 -               smp_mb(); /* finish reading the event before updatng the head */
2904 -               ring->head = head;
2905 -               ret = 1;
2906 -               put_aio_ring_event(evp, KM_USER1);
2907 +               if (ent) { /* event requested */
2908 +                       struct io_event *evp =
2909 +                               aio_ring_event(info, head, KM_USER1);
2910 +                       *ent = *evp;
2911 +                       head = (head + 1) % info->nr;
2912 +                       /* finish reading the event before updatng the head */
2913 +                       smp_mb();
2914 +                       ring->head = head;
2915 +                       ret = 1;
2916 +                       put_aio_ring_event(evp, KM_USER1);
2917 +               } else /* only need to know availability */
2918 +                       ret = 1;
2919         }
2920         spin_unlock(&info->ring_lock);
2921  
2922 @@ -1212,6 +1229,13 @@ static void io_destroy(struct kioctx *io
2923  
2924         aio_cancel_all(ioctx);
2925         wait_for_all_aios(ioctx);
2926 +#ifdef CONFIG_EPOLL
2927 +       /* forget the poll file, but it's up to the user to close it */
2928 +       if (ioctx->file) {
2929 +               ioctx->file->private_data = 0;
2930 +               ioctx->file = 0;
2931 +       }
2932 +#endif
2933  
2934         /*
2935          * Wake up any waiters.  The setting of ctx->dead must be seen
2936 @@ -1222,6 +1246,67 @@ static void io_destroy(struct kioctx *io
2937         put_ioctx(ioctx);       /* once for the lookup */
2938  }
2939  
2940 +#ifdef CONFIG_EPOLL
2941 +
2942 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
2943 +{
2944 +       struct kioctx *ioctx = file->private_data;
2945 +       if (ioctx) {
2946 +               file->private_data = 0;
2947 +               spin_lock_irq(&ioctx->ctx_lock);
2948 +               ioctx->file = 0;
2949 +               spin_unlock_irq(&ioctx->ctx_lock);
2950 +       }
2951 +       return 0;
2952 +}
2953 +
2954 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
2955 +{      unsigned int pollflags = 0;
2956 +       struct kioctx *ioctx = file->private_data;
2957 +
2958 +       if (ioctx) {
2959 +
2960 +               spin_lock_irq(&ioctx->ctx_lock);
2961 +               /* Insert inside our poll wait queue */
2962 +               poll_wait(file, &ioctx->poll_wait, wait);
2963 +
2964 +               /* Check our condition */
2965 +               if (aio_read_evt(ioctx, 0))
2966 +                       pollflags = POLLIN | POLLRDNORM;
2967 +               spin_unlock_irq(&ioctx->ctx_lock);
2968 +       }
2969 +
2970 +       return pollflags;
2971 +}
2972 +
2973 +static const struct file_operations aioq_fops = {
2974 +       .release        = aio_queue_fd_close,
2975 +       .poll           = aio_queue_fd_poll
2976 +};
2977 +
2978 +/* make_aio_fd:
2979 + *  Create a file descriptor that can be used to poll the event queue.
2980 + *  Based and piggybacked on the excellent epoll code.
2981 + */
2982 +
2983 +static int make_aio_fd(struct kioctx *ioctx)
2984 +{
2985 +       int error, fd;
2986 +       struct inode *inode;
2987 +       struct file *file;
2988 +
2989 +       error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
2990 +       if (error)
2991 +               return error;
2992 +
2993 +       /* associate the file with the IO context */
2994 +       file->private_data = ioctx;
2995 +       ioctx->file = file;
2996 +       init_waitqueue_head(&ioctx->poll_wait);
2997 +       return fd;
2998 +}
2999 +#endif
3000 +
3001  /* sys_io_setup:
3002   *     Create an aio_context capable of receiving at least nr_events.
3003   *     ctxp must not point to an aio_context that already exists, and
3004 @@ -1234,18 +1319,30 @@ static void io_destroy(struct kioctx *io
3005   *     resources are available.  May fail with -EFAULT if an invalid
3006   *     pointer is passed for ctxp.  Will fail with -ENOSYS if not
3007   *     implemented.
3008 + *
3009 + *     To request a selectable fd, the user context has to be initialized
3010 + *     to 1, instead of 0, and the return value is the fd.
3011 + *     This keeps the system call compatible, since a non-zero value
3012 + *     was not allowed so far.
3013   */
3014  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
3015  {
3016         struct kioctx *ioctx = NULL;
3017         unsigned long ctx;
3018         long ret;
3019 +       int make_fd = 0;
3020  
3021         ret = get_user(ctx, ctxp);
3022         if (unlikely(ret))
3023                 goto out;
3024  
3025         ret = -EINVAL;
3026 +#ifdef CONFIG_EPOLL
3027 +       if (ctx == 1) {
3028 +               make_fd = 1;
3029 +               ctx = 0;
3030 +       }
3031 +#endif
3032         if (unlikely(ctx || nr_events == 0)) {
3033                 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
3034                          ctx, nr_events);
3035 @@ -1256,8 +1353,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_e
3036         ret = PTR_ERR(ioctx);
3037         if (!IS_ERR(ioctx)) {
3038                 ret = put_user(ioctx->user_id, ctxp);
3039 -               if (!ret)
3040 -                       return 0;
3041 +#ifdef CONFIG_EPOLL
3042 +               if (make_fd && ret >= 0)
3043 +                       ret = make_aio_fd(ioctx);
3044 +#endif
3045 +               if (ret >= 0)
3046 +                       return ret;
3047  
3048                 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
3049                 io_destroy(ioctx);
3050 --- head-2011-07-21.orig/fs/compat_ioctl.c      2011-07-21 12:00:01.000000000 +0200
3051 +++ head-2011-07-21/fs/compat_ioctl.c   2011-04-13 11:25:29.000000000 +0200
3052 @@ -114,6 +114,13 @@
3053  #include <asm/fbio.h>
3054  #endif
3055  
3056 +#ifdef CONFIG_XEN