Merge branch 'packaging'
[opensuse:kernel-source.git] / patches.xen / xen3-auto-common.diff
1 Subject: xen3 common
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1066:647366713aef)
3 Patch-mainline: n/a
4 Acked-by: jbeulich@novell.com
5
6 List of files that don't require modification anymore (and hence got
7 removed from this patch), for reference and in case upstream wants to
8 take the forward porting patches:
9 2.6.19/include/linux/skbuff.h
10 2.6.19/net/core/dev.c
11 2.6.19/net/core/skbuff.c
12 2.6.19/net/ipv4/netfilter/nf_nat_proto_tcp.c
13 2.6.19/net/ipv4/netfilter/nf_nat_proto_udp.c
14 2.6.19/net/ipv4/xfrm4_output.c
15 2.6.22/include/linux/sched.h
16 2.6.22/kernel/softlockup.c
17 2.6.22/kernel/timer.c
18 2.6.25/mm/highmem.c
19 2.6.30/include/linux/pci_regs.h
20
21 --- head-2011-02-08.orig/drivers/Makefile       2011-01-31 12:42:59.000000000 +0100
22 +++ head-2011-02-08/drivers/Makefile    2011-01-31 14:53:38.000000000 +0100
23 @@ -35,6 +35,7 @@ obj-$(CONFIG_PARPORT)         += parport/
24  obj-y                          += base/ block/ misc/ mfd/ nfc/
25  obj-$(CONFIG_NUBUS)            += nubus/
26  obj-y                          += macintosh/
27 +obj-$(CONFIG_XEN)              += xen/
28  obj-$(CONFIG_IDE)              += ide/
29  obj-$(CONFIG_SCSI)             += scsi/
30  obj-$(CONFIG_ATA)              += ata/
31 --- head-2011-02-08.orig/drivers/acpi/Makefile  2011-01-31 12:42:04.000000000 +0100
32 +++ head-2011-02-08/drivers/acpi/Makefile       2011-01-31 14:53:38.000000000 +0100
33 @@ -67,6 +67,9 @@ obj-$(CONFIG_ACPI_EC_DEBUGFS) += ec_sys.
34  processor-y                    := processor_driver.o processor_throttling.o
35  processor-y                    += processor_idle.o processor_thermal.o
36  processor-$(CONFIG_CPU_FREQ)   += processor_perflib.o
37 +ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
38 +processor-objs += processor_perflib.o processor_extcntl.o
39 +endif
40  
41  obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
42  obj-$(CONFIG_ACPI_IPMI)                += acpi_ipmi.o
43 --- head-2011-02-08.orig/drivers/acpi/acpica/hwsleep.c  2011-01-31 12:42:53.000000000 +0100
44 +++ head-2011-02-08/drivers/acpi/acpica/hwsleep.c       2011-01-31 14:53:38.000000000 +0100
45 @@ -236,7 +236,11 @@ acpi_status asmlinkage acpi_enter_sleep_
46         u32 pm1b_control;
47         struct acpi_bit_register_info *sleep_type_reg_info;
48         struct acpi_bit_register_info *sleep_enable_reg_info;
49 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
50         u32 in_value;
51 +#else
52 +       int err;
53 +#endif
54         struct acpi_object_list arg_list;
55         union acpi_object arg;
56         acpi_status status;
57 @@ -347,6 +351,7 @@ acpi_status asmlinkage acpi_enter_sleep_
58  
59         /* Write #2: Write both SLP_TYP + SLP_EN */
60  
61 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
62         status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
63         if (ACPI_FAILURE(status)) {
64                 return_ACPI_STATUS(status);
65 @@ -386,6 +391,16 @@ acpi_status asmlinkage acpi_enter_sleep_
66                 /* Spin until we wake */
67  
68         } while (!in_value);
69 +#else
70 +       /* PV ACPI just need check hypercall return value */
71 +       err = acpi_notify_hypervisor_state(sleep_state,
72 +                       PM1Acontrol, PM1Bcontrol);
73 +       if (err) {
74 +               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
75 +                                 "Hypervisor failure [%d]\n", err));
76 +               return_ACPI_STATUS(AE_ERROR);
77 +       }
78 +#endif
79  
80         return_ACPI_STATUS(AE_OK);
81  }
82 --- head-2011-02-08.orig/drivers/acpi/processor_driver.c        2011-01-31 12:42:04.000000000 +0100
83 +++ head-2011-02-08/drivers/acpi/processor_driver.c     2011-01-31 14:53:38.000000000 +0100
84 @@ -325,7 +325,8 @@ static int acpi_processor_get_info(struc
85          */
86         if (pr->id == -1) {
87                 if (ACPI_FAILURE
88 -                   (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
89 +                   (acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
90 +                   !processor_cntl_external()) {
91                         return -ENODEV;
92                 }
93         }
94 @@ -376,7 +377,11 @@ static int acpi_processor_get_info(struc
95         return 0;
96  }
97  
98 +#ifndef CONFIG_XEN
99  static DEFINE_PER_CPU(void *, processor_device_array);
100 +#else
101 +static void *processor_device_array[NR_ACPI_CPUS];
102 +#endif
103  
104  static void acpi_processor_notify(struct acpi_device *device, u32 event)
105  {
106 @@ -462,8 +467,11 @@ static int __cpuinit acpi_processor_add(
107         strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
108         device->driver_data = pr;
109  
110 +       processor_extcntl_init();
111 +
112         result = acpi_processor_get_info(device);
113 -       if (result) {
114 +       if (result ||
115 +           ((pr->id == -1) && !processor_cntl_external())) {
116                 /* Processor is physically not present */
117                 return 0;
118         }
119 @@ -473,23 +481,36 @@ static int __cpuinit acpi_processor_add(
120                 return 0;
121  #endif
122  
123 -       BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
124 +       BUG_ON(!processor_cntl_external() &&
125 +              ((pr->id >= nr_cpu_ids) || (pr->id < 0)));
126  
127         /*
128          * Buggy BIOS check
129          * ACPI id of processors can be reported wrongly by the BIOS.
130          * Don't trust it blindly
131          */
132 +#ifndef CONFIG_XEN
133         if (per_cpu(processor_device_array, pr->id) != NULL &&
134             per_cpu(processor_device_array, pr->id) != device) {
135 +#else
136 +       BUG_ON(pr->acpi_id >= NR_ACPI_CPUS);
137 +       if (processor_device_array[pr->acpi_id] != NULL &&
138 +           processor_device_array[pr->acpi_id] != device) {
139 +#endif
140                 printk(KERN_WARNING "BIOS reported wrong ACPI id "
141                         "for the processor\n");
142                 result = -ENODEV;
143                 goto err_free_cpumask;
144         }
145 +#ifndef CONFIG_XEN
146         per_cpu(processor_device_array, pr->id) = device;
147  
148         per_cpu(processors, pr->id) = pr;
149 +#else
150 +       processor_device_array[pr->acpi_id] = device;
151 +       if (pr->id != -1)
152 +               per_cpu(processors, pr->id) = pr;
153 +#endif
154  
155         sysdev = get_cpu_sysdev(pr->id);
156         if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
157 @@ -497,16 +518,28 @@ static int __cpuinit acpi_processor_add(
158                 goto err_free_cpumask;
159         }
160  
161 -#ifdef CONFIG_CPU_FREQ
162 +#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
163         acpi_processor_ppc_has_changed(pr, 0);
164  #endif
165 -       acpi_processor_get_throttling_info(pr);
166 -       acpi_processor_get_limit_info(pr);
167  
168 +       /*
169 +        * pr->id may equal to -1 while processor_cntl_external enabled.
170 +        * throttle and thermal module don't support this case.
171 +        * Tx only works when dom0 vcpu == pcpu num by far, as we give
172 +        * control to dom0.
173 +        */
174 +       if (pr->id != -1) {
175 +               acpi_processor_get_throttling_info(pr);
176 +               acpi_processor_get_limit_info(pr);
177 +       }
178  
179         if (cpuidle_get_driver() == &acpi_idle_driver)
180                 acpi_processor_power_init(pr, device);
181  
182 +       result = processor_extcntl_prepare(pr);
183 +       if (result)
184 +               goto end;
185 +
186         pr->cdev = thermal_cooling_device_register("Processor", device,
187                                                 &processor_cooling_ops);
188         if (IS_ERR(pr->cdev)) {
189 @@ -556,7 +589,7 @@ static int acpi_processor_remove(struct 
190  
191         pr = acpi_driver_data(device);
192  
193 -       if (pr->id >= nr_cpu_ids)
194 +       if (!processor_cntl_external() && pr->id >= nr_cpu_ids)
195                 goto free;
196  
197         if (type == ACPI_BUS_REMOVAL_EJECT) {
198 @@ -575,8 +608,14 @@ static int acpi_processor_remove(struct 
199                 pr->cdev = NULL;
200         }
201  
202 +#ifndef CONFIG_XEN
203         per_cpu(processors, pr->id) = NULL;
204         per_cpu(processor_device_array, pr->id) = NULL;
205 +#else
206 +       if (pr->id != -1)
207 +               per_cpu(processors, pr->id) = NULL;
208 +       processor_device_array[pr->acpi_id] = NULL;
209 +#endif
210  
211  free:
212         free_cpumask_var(pr->throttling.shared_cpu_map);
213 @@ -632,6 +671,10 @@ int acpi_processor_device_add(acpi_handl
214                 return -ENODEV;
215         }
216  
217 +       if (processor_cntl_external() && acpi_driver_data(*device))
218 +               processor_notify_external(acpi_driver_data(*device),
219 +                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
220 +
221         return 0;
222  }
223  
224 @@ -661,6 +704,10 @@ static void __ref acpi_processor_hotplug
225                                             "Unable to add the device\n");
226                         break;
227                 }
228 +               pr = acpi_driver_data(device);
229 +               if (processor_cntl_external() && pr)
230 +                       processor_notify_external(pr,
231 +                                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
232                 break;
233         case ACPI_NOTIFY_EJECT_REQUEST:
234                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
235 @@ -677,6 +724,9 @@ static void __ref acpi_processor_hotplug
236                                     "Driver data is NULL, dropping EJECT\n");
237                         return;
238                 }
239 +               if (processor_cntl_external())
240 +                       processor_notify_external(pr, PROCESSOR_HOTPLUG,
241 +                                               HOTPLUG_TYPE_REMOVE);
242                 break;
243         default:
244                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
245 @@ -741,6 +791,11 @@ static acpi_status acpi_processor_hotadd
246  
247  static int acpi_processor_handle_eject(struct acpi_processor *pr)
248  {
249 +#ifdef CONFIG_XEN
250 +       if (pr->id == -1)
251 +               return (0);
252 +#endif
253 +
254         if (cpu_online(pr->id))
255                 cpu_down(pr->id);
256  
257 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
258 +++ head-2011-02-08/drivers/acpi/processor_extcntl.c    2011-01-31 14:53:38.000000000 +0100
259 @@ -0,0 +1,241 @@
260 +/*
261 + * processor_extcntl.c - channel to external control logic
262 + *
263 + *  Copyright (C) 2008, Intel corporation
264 + *
265 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
266 + *
267 + *  This program is free software; you can redistribute it and/or modify
268 + *  it under the terms of the GNU General Public License as published by
269 + *  the Free Software Foundation; either version 2 of the License, or (at
270 + *  your option) any later version.
271 + *
272 + *  This program is distributed in the hope that it will be useful, but
273 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
274 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
275 + *  General Public License for more details.
276 + *
277 + *  You should have received a copy of the GNU General Public License along
278 + *  with this program; if not, write to the Free Software Foundation, Inc.,
279 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
280 + *
281 + */
282 +
283 +#include <linux/kernel.h>
284 +#include <linux/init.h>
285 +#include <linux/types.h>
286 +#include <linux/acpi.h>
287 +#include <linux/pm.h>
288 +#include <linux/cpu.h>
289 +
290 +#include <acpi/processor.h>
291 +
292 +#define ACPI_PROCESSOR_COMPONENT        0x01000000
293 +#define ACPI_PROCESSOR_CLASS            "processor"
294 +#define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
295 +#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
296 +ACPI_MODULE_NAME("acpi_processor")
297 +
298 +static int processor_extcntl_parse_csd(struct acpi_processor *pr);
299 +static int processor_extcntl_get_performance(struct acpi_processor *pr);
300 +/*
301 + * External processor control logic may register with its own set of
302 + * ops to get ACPI related notification. One example is like VMM.
303 + */
304 +const struct processor_extcntl_ops *processor_extcntl_ops;
305 +EXPORT_SYMBOL(processor_extcntl_ops);
306 +
307 +static int processor_notify_smm(void)
308 +{
309 +       acpi_status status;
310 +       static int is_done = 0;
311 +
312 +       /* only need successfully notify BIOS once */
313 +       /* avoid double notification which may lead to unexpected result */
314 +       if (is_done)
315 +               return 0;
316 +
317 +       /* Can't write pstate_cnt to smi_cmd if either value is zero */
318 +       if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
319 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
320 +               return 0;
321 +       }
322 +
323 +       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
324 +               "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
325 +               acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
326 +
327 +       /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
328 +        * it anyway, so we need to support it... */
329 +       if (acpi_fadt_is_v1) {
330 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
331 +                       "Using v1.0 FADT reserved value for pstate_cnt\n"));
332 +       }
333 +
334 +       status = acpi_os_write_port(acpi_fadt.smi_cmd,
335 +                                   (u32) acpi_fadt.pstate_cnt, 8);
336 +       if (ACPI_FAILURE(status))
337 +               return status;
338 +
339 +       is_done = 1;
340 +
341 +       return 0;
342 +}
343 +
344 +int processor_notify_external(struct acpi_processor *pr, int event, int type)
345 +{
346 +       int ret = -EINVAL;
347 +
348 +       if (!processor_cntl_external())
349 +               return -EINVAL;
350 +
351 +       switch (event) {
352 +       case PROCESSOR_PM_INIT:
353 +       case PROCESSOR_PM_CHANGE:
354 +               if ((type >= PM_TYPE_MAX) ||
355 +                       !processor_extcntl_ops->pm_ops[type])
356 +                       break;
357 +
358 +               ret = processor_extcntl_ops->pm_ops[type](pr, event);
359 +               break;
360 +       case PROCESSOR_HOTPLUG:
361 +               if (processor_extcntl_ops->hotplug)
362 +                       ret = processor_extcntl_ops->hotplug(pr, type);
363 +               break;
364 +       default:
365 +               printk(KERN_ERR "Unsupport processor events %d.\n", event);
366 +               break;
367 +       }
368 +
369 +       return ret;
370 +}
371 +
372 +/*
373 + * External control logic can decide to grab full or part of physical
374 + * processor control bits. Take a VMM for example, physical processors
375 + * are owned by VMM and thus existence information like hotplug is
376 + * always required to be notified to VMM. Similar is processor idle
377 + * state which is also necessarily controlled by VMM. But for other
378 + * control bits like performance/throttle states, VMM may choose to
379 + * control or not upon its own policy.
380 + */
381 +void processor_extcntl_init(void)
382 +{
383 +       if (!processor_extcntl_ops)
384 +               arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
385 +}
386 +
387 +/*
388 + * This is called from ACPI processor init, and targeted to hold
389 + * some tricky housekeeping jobs to satisfy external control model.
390 + * For example, we may put dependency parse stub here for idle
391 + * and performance state. Those information may be not available
392 + * if splitting from dom0 control logic like cpufreq driver.
393 + */
394 +int processor_extcntl_prepare(struct acpi_processor *pr)
395 +{
396 +       /* parse cstate dependency information */
397 +       if (processor_pm_external())
398 +               processor_extcntl_parse_csd(pr);
399 +
400 +       /* Initialize performance states */
401 +       if (processor_pmperf_external())
402 +               processor_extcntl_get_performance(pr);
403 +
404 +       return 0;
405 +}
406 +
407 +/*
408 + * Currently no _CSD is implemented which is why existing ACPI code
409 + * doesn't parse _CSD at all. But to keep interface complete with
410 + * external control logic, we put a placeholder here for future
411 + * compatibility.
412 + */
413 +static int processor_extcntl_parse_csd(struct acpi_processor *pr)
414 +{
415 +       int i;
416 +
417 +       for (i = 0; i < pr->power.count; i++) {
418 +               if (!pr->power.states[i].valid)
419 +                       continue;
420 +
421 +               /* No dependency by default */
422 +               pr->power.states[i].domain_info = NULL;
423 +               pr->power.states[i].csd_count = 0;
424 +       }
425 +
426 +       return 0;
427 +}
428 +
429 +/*
430 + * Existing ACPI module does parse performance states at some point,
431 + * when acpi-cpufreq driver is loaded which however is something
432 + * we'd like to disable to avoid confliction with external control
433 + * logic. So we have to collect raw performance information here
434 + * when ACPI processor object is found and started.
435 + */
436 +static int processor_extcntl_get_performance(struct acpi_processor *pr)
437 +{
438 +       int ret;
439 +       struct acpi_processor_performance *perf;
440 +       struct acpi_psd_package *pdomain;
441 +
442 +       if (pr->performance)
443 +               return -EBUSY;
444 +
445 +       perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
446 +       if (!perf)
447 +               return -ENOMEM;
448 +
449 +       pr->performance = perf;
450 +       /* Get basic performance state information */
451 +       ret = acpi_processor_get_performance_info(pr);
452 +       if (ret < 0)
453 +               goto err_out;
454 +
455 +       /*
456 +        * Well, here we need retrieve performance dependency information
457 +        * from _PSD object. The reason why existing interface is not used
458 +        * is due to the reason that existing interface sticks to Linux cpu
459 +        * id to construct some bitmap, however we want to split ACPI
460 +        * processor objects from Linux cpu id logic. For example, even
461 +        * when Linux is configured as UP, we still want to parse all ACPI
462 +        * processor objects to external logic. In this case, it's preferred
463 +        * to use ACPI ID instead.
464 +        */
465 +       pdomain = &pr->performance->domain_info;
466 +       pdomain->num_processors = 0;
467 +       ret = acpi_processor_get_psd(pr);
468 +       if (ret < 0) {
469 +               /*
470 +                * _PSD is optional - assume no coordination if absent (or
471 +                * broken), matching native kernels' behavior.
472 +                */
473 +               pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
474 +               pdomain->revision = ACPI_PSD_REV0_REVISION;
475 +               pdomain->domain = pr->acpi_id;
476 +               pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
477 +               pdomain->num_processors = 1;
478 +       }
479 +
480 +       /* Some sanity check */
481 +       if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
482 +           (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
483 +           ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
484 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
485 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
486 +               ret = -EINVAL;
487 +               goto err_out;
488 +       }
489 +
490 +       /* Last step is to notify BIOS that external logic exists */
491 +       processor_notify_smm();
492 +
493 +       processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
494 +
495 +       return 0;
496 +err_out:
497 +       pr->performance = NULL;
498 +       kfree(perf);
499 +       return ret;
500 +}
501 --- head-2011-02-08.orig/drivers/acpi/processor_idle.c  2011-01-31 12:42:04.000000000 +0100
502 +++ head-2011-02-08/drivers/acpi/processor_idle.c       2011-01-31 14:53:38.000000000 +0100
503 @@ -458,7 +458,8 @@ static int acpi_processor_get_power_info
504                                  */
505                                 cx.entry_method = ACPI_CSTATE_HALT;
506                                 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
507 -                       } else {
508 +                       /* This doesn't apply to external control case */
509 +                       } else if (!processor_pm_external()) {
510                                 continue;
511                         }
512                         if (cx.type == ACPI_STATE_C1 &&
513 @@ -497,6 +498,12 @@ static int acpi_processor_get_power_info
514  
515                 cx.power = obj->integer.value;
516  
517 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
518 +               /* cache control methods to notify external logic */
519 +               if (processor_pm_external())
520 +                       memcpy(&cx.reg, reg, sizeof(*reg));
521 +#endif
522 +
523                 current_count++;
524                 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
525  
526 @@ -1130,6 +1137,11 @@ int __cpuinit acpi_processor_power_init(
527                 if (cpuidle_register_device(&pr->power.dev))
528                         return -EIO;
529         }
530 +
531 +       if (processor_pm_external())
532 +               processor_notify_external(pr,
533 +                       PROCESSOR_PM_INIT, PM_TYPE_IDLE);
534 +
535         return 0;
536  }
537  
538 --- head-2011-02-08.orig/drivers/acpi/processor_perflib.c       2010-10-20 22:30:22.000000000 +0200
539 +++ head-2011-02-08/drivers/acpi/processor_perflib.c    2011-01-31 14:53:38.000000000 +0100
540 @@ -79,6 +79,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre
541  
542  static int acpi_processor_ppc_status;
543  
544 +#ifdef CONFIG_CPU_FREQ
545  static int acpi_processor_ppc_notifier(struct notifier_block *nb,
546                                        unsigned long event, void *data)
547  {
548 @@ -121,6 +122,7 @@ static int acpi_processor_ppc_notifier(s
549  static struct notifier_block acpi_ppc_notifier_block = {
550         .notifier_call = acpi_processor_ppc_notifier,
551  };
552 +#endif /* CONFIG_CPU_FREQ */
553  
554  static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
555  {
556 @@ -209,7 +211,12 @@ int acpi_processor_ppc_has_changed(struc
557         if (ret < 0)
558                 return (ret);
559         else
560 +#ifdef CONFIG_CPU_FREQ
561                 return cpufreq_update_policy(pr->id);
562 +#elif defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
563 +               return processor_notify_external(pr,
564 +                               PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
565 +#endif
566  }
567  
568  int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
569 @@ -225,6 +232,7 @@ int acpi_processor_get_bios_limit(int cp
570  }
571  EXPORT_SYMBOL(acpi_processor_get_bios_limit);
572  
573 +#ifdef CONFIG_CPU_FREQ
574  void acpi_processor_ppc_init(void)
575  {
576         if (!cpufreq_register_notifier
577 @@ -243,6 +251,7 @@ void acpi_processor_ppc_exit(void)
578  
579         acpi_processor_ppc_status &= ~PPC_REGISTERED;
580  }
581 +#endif /* CONFIG_CPU_FREQ */
582  
583  static int acpi_processor_get_performance_control(struct acpi_processor *pr)
584  {
585 @@ -390,7 +399,10 @@ static int acpi_processor_get_performanc
586         return result;
587  }
588  
589 -static int acpi_processor_get_performance_info(struct acpi_processor *pr)
590 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
591 +static
592 +#endif
593 +int acpi_processor_get_performance_info(struct acpi_processor *pr)
594  {
595         int result = 0;
596         acpi_status status = AE_OK;
597 @@ -435,6 +447,7 @@ static int acpi_processor_get_performanc
598         return result;
599  }
600  
601 +#ifdef CONFIG_CPU_FREQ
602  int acpi_processor_notify_smm(struct module *calling_module)
603  {
604         acpi_status status;
605 @@ -495,8 +508,12 @@ int acpi_processor_notify_smm(struct mod
606  }
607  
608  EXPORT_SYMBOL(acpi_processor_notify_smm);
609 +#endif /* CONFIG_CPU_FREQ */
610  
611 -static int acpi_processor_get_psd(struct acpi_processor        *pr)
612 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
613 +static
614 +#endif
615 +int acpi_processor_get_psd(struct acpi_processor *pr)
616  {
617         int result = 0;
618         acpi_status status = AE_OK;
619 --- head-2011-02-08.orig/drivers/acpi/sleep.c   2011-01-31 12:42:53.000000000 +0100
620 +++ head-2011-02-08/drivers/acpi/sleep.c        2011-01-31 14:53:38.000000000 +0100
621 @@ -60,6 +60,7 @@ static struct notifier_block tts_notifie
622  static int acpi_sleep_prepare(u32 acpi_state)
623  {
624  #ifdef CONFIG_ACPI_SLEEP
625 +#ifndef CONFIG_ACPI_PV_SLEEP
626         /* do we have a wakeup address for S2 and S3? */
627         if (acpi_state == ACPI_STATE_S3) {
628                 if (!acpi_wakeup_address) {
629 @@ -69,6 +70,7 @@ static int acpi_sleep_prepare(u32 acpi_s
630                                 (acpi_physical_address)acpi_wakeup_address);
631  
632         }
633 +#endif
634         ACPI_FLUSH_CPU_CACHE();
635  #endif
636         printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n",
637 @@ -264,7 +266,14 @@ static int acpi_suspend_enter(suspend_st
638                 break;
639  
640         case ACPI_STATE_S3:
641 +#ifdef CONFIG_ACPI_PV_SLEEP
642 +               /* Hyperviosr will save and restore CPU context
643 +                * and then we can skip low level housekeeping here.
644 +                */
645 +               acpi_enter_sleep_state(acpi_state);
646 +#else
647                 do_suspend_lowlevel();
648 +#endif
649                 break;
650         }
651  
652 --- head-2011-02-08.orig/drivers/char/agp/intel-gtt.c   2011-02-08 09:51:53.000000000 +0100
653 +++ head-2011-02-08/drivers/char/agp/intel-gtt.c        2011-02-08 10:02:22.000000000 +0100
654 @@ -150,6 +150,13 @@ static struct page *i8xx_alloc_pages(voi
655         if (page == NULL)
656                 return NULL;
657  
658 +#ifdef CONFIG_XEN
659 +       if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
660 +               __free_pages(page, 2);
661 +               return NULL;
662 +       }
663 +#endif
664 +
665         if (set_pages_uc(page, 4) < 0) {
666                 set_pages_wb(page, 4);
667                 __free_pages(page, 2);
668 @@ -166,6 +173,9 @@ static void i8xx_destroy_pages(struct pa
669                 return;
670  
671         set_pages_wb(page, 4);
672 +#ifdef CONFIG_XEN
673 +       xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
674 +#endif
675         put_page(page);
676         __free_pages(page, 2);
677         atomic_dec(&agp_bridge->current_memory_agp);
678 --- head-2011-02-08.orig/drivers/char/mem.c     2011-01-31 12:42:58.000000000 +0100
679 +++ head-2011-02-08/drivers/char/mem.c  2011-01-31 14:53:38.000000000 +0100
680 @@ -89,6 +89,7 @@ void __weak unxlate_dev_mem_ptr(unsigned
681  {
682  }
683  
684 +#ifndef ARCH_HAS_DEV_MEM
685  /*
686   * This funcion reads the *physical* memory. The f_pos points directly to the
687   * memory location.
688 @@ -211,6 +212,7 @@ static ssize_t write_mem(struct file *fi
689         *ppos += written;
690         return written;
691  }
692 +#endif
693  
694  int __weak phys_mem_access_prot_allowed(struct file *file,
695         unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
696 @@ -337,6 +339,9 @@ static int mmap_mem(struct file *file, s
697  static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
698  {
699         unsigned long pfn;
700 +#ifdef CONFIG_XEN
701 +       unsigned long i, count;
702 +#endif
703  
704         /* Turn a kernel-virtual address into a physical page frame */
705         pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
706 @@ -351,6 +356,13 @@ static int mmap_kmem(struct file *file, 
707         if (!pfn_valid(pfn))
708                 return -EIO;
709  
710 +#ifdef CONFIG_XEN
711 +       count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
712 +       for (i = 0; i < count; i++)
713 +               if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i)))
714 +                       return -EIO;
715 +#endif
716 +
717         vma->vm_pgoff = pfn;
718         return mmap_mem(file, vma);
719  }
720 @@ -845,6 +857,7 @@ static int open_port(struct inode * inod
721  #define open_kmem      open_mem
722  #define open_oldmem    open_mem
723  
724 +#ifndef ARCH_HAS_DEV_MEM
725  static const struct file_operations mem_fops = {
726         .llseek         = memory_lseek,
727         .read           = read_mem,
728 @@ -853,6 +866,9 @@ static const struct file_operations mem_
729         .open           = open_mem,
730         .get_unmapped_area = get_unmapped_area_mem,
731  };
732 +#else
733 +extern const struct file_operations mem_fops;
734 +#endif
735  
736  #ifdef CONFIG_DEVKMEM
737  static const struct file_operations kmem_fops = {
738 --- head-2011-02-08.orig/drivers/char/tpm/Makefile      2006-09-20 05:42:06.000000000 +0200
739 +++ head-2011-02-08/drivers/char/tpm/Makefile   2011-01-31 14:53:38.000000000 +0100
740 @@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o
741  obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
742  obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
743  obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
744 +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
745 +tpm_xenu-y = tpm_xen.o tpm_vtpm.o
746 --- head-2011-02-08.orig/drivers/char/tpm/tpm.h 2011-01-31 12:42:05.000000000 +0100
747 +++ head-2011-02-08/drivers/char/tpm/tpm.h      2011-01-31 14:53:38.000000000 +0100
748 @@ -108,6 +108,9 @@ struct tpm_chip {
749         struct dentry **bios_dir;
750  
751         struct list_head list;
752 +#ifdef CONFIG_XEN
753 +       void *priv;
754 +#endif
755         void (*release) (struct device *);
756  };
757  
758 @@ -272,6 +275,18 @@ struct tpm_cmd_t {
759  
760  ssize_t        tpm_getcap(struct device *, __be32, cap_t *, const char *);
761  
762 +#ifdef CONFIG_XEN
763 +static inline void *chip_get_private(const struct tpm_chip *chip)
764 +{
765 +       return chip->priv;
766 +}
767 +
768 +static inline void chip_set_private(struct tpm_chip *chip, void *priv)
769 +{
770 +       chip->priv = priv;
771 +}
772 +#endif
773 +
774  extern void tpm_get_timeouts(struct tpm_chip *);
775  extern void tpm_gen_interrupt(struct tpm_chip *);
776  extern void tpm_continue_selftest(struct tpm_chip *);
777 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
778 +++ head-2011-02-08/drivers/char/tpm/tpm_vtpm.c 2011-01-31 14:53:38.000000000 +0100
779 @@ -0,0 +1,542 @@
780 +/*
781 + * Copyright (C) 2006 IBM Corporation
782 + *
783 + * Authors:
784 + * Stefan Berger <stefanb@us.ibm.com>
785 + *
786 + * Generic device driver part for device drivers in a virtualized
787 + * environment.
788 + *
789 + * This program is free software; you can redistribute it and/or
790 + * modify it under the terms of the GNU General Public License as
791 + * published by the Free Software Foundation, version 2 of the
792 + * License.
793 + *
794 + */
795 +
796 +#include <asm/uaccess.h>
797 +#include <linux/list.h>
798 +#include <linux/device.h>
799 +#include <linux/interrupt.h>
800 +#include <linux/platform_device.h>
801 +#include "tpm.h"
802 +#include "tpm_vtpm.h"
803 +
804 +/* read status bits */
805 +enum {
806 +       STATUS_BUSY = 0x01,
807 +       STATUS_DATA_AVAIL = 0x02,
808 +       STATUS_READY = 0x04
809 +};
810 +
811 +struct transmission {
812 +       struct list_head next;
813 +
814 +       unsigned char *request;
815 +       size_t  request_len;
816 +       size_t  request_buflen;
817 +
818 +       unsigned char *response;
819 +       size_t  response_len;
820 +       size_t  response_buflen;
821 +
822 +       unsigned int flags;
823 +};
824 +
825 +enum {
826 +       TRANSMISSION_FLAG_WAS_QUEUED = 0x1
827 +};
828 +
829 +
830 +enum {
831 +       DATAEX_FLAG_QUEUED_ONLY = 0x1
832 +};
833 +
834 +
835 +/* local variables */
836 +
837 +/* local function prototypes */
838 +static int _vtpm_send_queued(struct tpm_chip *chip);
839 +
840 +
841 +/* =============================================================
842 + * Some utility functions
843 + * =============================================================
844 + */
845 +static void vtpm_state_init(struct vtpm_state *vtpms)
846 +{
847 +       vtpms->current_request = NULL;
848 +       spin_lock_init(&vtpms->req_list_lock);
849 +       init_waitqueue_head(&vtpms->req_wait_queue);
850 +       INIT_LIST_HEAD(&vtpms->queued_requests);
851 +
852 +       vtpms->current_response = NULL;
853 +       spin_lock_init(&vtpms->resp_list_lock);
854 +       init_waitqueue_head(&vtpms->resp_wait_queue);
855 +
856 +       vtpms->disconnect_time = jiffies;
857 +}
858 +
859 +
860 +static inline struct transmission *transmission_alloc(void)
861 +{
862 +       return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
863 +}
864 +
865 +static unsigned char *
866 +transmission_set_req_buffer(struct transmission *t,
867 +                            unsigned char *buffer, size_t len)
868 +{
869 +       if (t->request_buflen < len) {
870 +               kfree(t->request);
871 +               t->request = kmalloc(len, GFP_KERNEL);
872 +               if (!t->request) {
873 +                       t->request_buflen = 0;
874 +                       return NULL;
875 +               }
876 +               t->request_buflen = len;
877 +       }
878 +
879 +       memcpy(t->request, buffer, len);
880 +       t->request_len = len;
881 +
882 +       return t->request;
883 +}
884 +
885 +static unsigned char *
886 +transmission_set_res_buffer(struct transmission *t,
887 +                            const unsigned char *buffer, size_t len)
888 +{
889 +       if (t->response_buflen < len) {
890 +               kfree(t->response);
891 +               t->response = kmalloc(len, GFP_ATOMIC);
892 +               if (!t->response) {
893 +                       t->response_buflen = 0;
894 +                       return NULL;
895 +               }
896 +               t->response_buflen = len;
897 +       }
898 +
899 +       memcpy(t->response, buffer, len);
900 +       t->response_len = len;
901 +
902 +       return t->response;
903 +}
904 +
905 +static inline void transmission_free(struct transmission *t)
906 +{
907 +       kfree(t->request);
908 +       kfree(t->response);
909 +       kfree(t);
910 +}
911 +
912 +/* =============================================================
913 + * Interface with the lower layer driver
914 + * =============================================================
915 + */
916 +/*
917 + * Lower layer uses this function to make a response available.
918 + */
919 +int vtpm_vd_recv(const struct tpm_chip *chip,
920 +                 const unsigned char *buffer, size_t count,
921 +                 void *ptr)
922 +{
923 +       unsigned long flags;
924 +       int ret_size = 0;
925 +       struct transmission *t;
926 +       struct vtpm_state *vtpms;
927 +
928 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
929 +
930 +       /*
931 +        * The list with requests must contain one request
932 +        * only and the element there must be the one that
933 +        * was passed to me from the front-end.
934 +        */
935 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
936 +       if (vtpms->current_request != ptr) {
937 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
938 +               return 0;
939 +       }
940 +
941 +       if ((t = vtpms->current_request)) {
942 +               transmission_free(t);
943 +               vtpms->current_request = NULL;
944 +       }
945 +
946 +       t = transmission_alloc();
947 +       if (t) {
948 +               if (!transmission_set_res_buffer(t, buffer, count)) {
949 +                       transmission_free(t);
950 +                       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
951 +                       return -ENOMEM;
952 +               }
953 +               ret_size = count;
954 +               vtpms->current_response = t;
955 +               wake_up_interruptible(&vtpms->resp_wait_queue);
956 +       }
957 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
958 +
959 +       return ret_size;
960 +}
961 +
962 +
963 +/*
964 + * Lower layer indicates its status (connected/disconnected)
965 + */
966 +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
967 +{
968 +       struct vtpm_state *vtpms;
969 +
970 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
971 +
972 +       vtpms->vd_status = vd_status;
973 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
974 +               vtpms->disconnect_time = jiffies;
975 +       }
976 +}
977 +
978 +/* =============================================================
979 + * Interface with the generic TPM driver
980 + * =============================================================
981 + */
982 +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
983 +{
984 +       int rc = 0;
985 +       unsigned long flags;
986 +       struct vtpm_state *vtpms;
987 +
988 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
989 +
990 +       /*
991 +        * Check if the previous operation only queued the command
992 +        * In this case there won't be a response, so I just
993 +        * return from here and reset that flag. In any other
994 +        * case I should receive a response from the back-end.
995 +        */
996 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
997 +       if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
998 +               vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
999 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1000 +               /*
1001 +                * The first few commands (measurements) must be
1002 +                * queued since it might not be possible to talk to the
1003 +                * TPM, yet.
1004 +                * Return a response of up to 30 '0's.
1005 +                */
1006 +
1007 +               count = min_t(size_t, count, 30);
1008 +               memset(buf, 0x0, count);
1009 +               return count;
1010 +       }
1011 +       /*
1012 +        * Check whether something is in the responselist and if
1013 +        * there's nothing in the list wait for something to appear.
1014 +        */
1015 +
1016 +       if (!vtpms->current_response) {
1017 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1018 +               interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
1019 +                                              1000);
1020 +               spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
1021 +       }
1022 +
1023 +       if (vtpms->current_response) {
1024 +               struct transmission *t = vtpms->current_response;
1025 +               vtpms->current_response = NULL;
1026 +               rc = min(count, t->response_len);
1027 +               memcpy(buf, t->response, rc);
1028 +               transmission_free(t);
1029 +       }
1030 +
1031 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1032 +       return rc;
1033 +}
1034 +
1035 +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
1036 +{
1037 +       int rc = 0;
1038 +       unsigned long flags;
1039 +       struct transmission *t = transmission_alloc();
1040 +       struct vtpm_state *vtpms;
1041 +
1042 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1043 +
1044 +       if (!t)
1045 +               return -ENOMEM;
1046 +       /*
1047 +        * If there's a current request, it must be the
1048 +        * previous request that has timed out.
1049 +        */
1050 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1051 +       if (vtpms->current_request != NULL) {
1052 +               printk("WARNING: Sending although there is a request outstanding.\n"
1053 +                      "         Previous request must have timed out.\n");
1054 +               transmission_free(vtpms->current_request);
1055 +               vtpms->current_request = NULL;
1056 +       }
1057 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1058 +
1059 +       /*
1060 +        * Queue the packet if the driver below is not
1061 +        * ready, yet, or there is any packet already
1062 +        * in the queue.
1063 +        * If the driver below is ready, unqueue all
1064 +        * packets first before sending our current
1065 +        * packet.
1066 +        * For each unqueued packet, except for the
1067 +        * last (=current) packet, call the function
1068 +        * tpm_xen_recv to wait for the response to come
1069 +        * back.
1070 +        */
1071 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1072 +               if (time_after(jiffies,
1073 +                              vtpms->disconnect_time + HZ * 10)) {
1074 +                       rc = -ENOENT;
1075 +               } else {
1076 +                       goto queue_it;
1077 +               }
1078 +       } else {
1079 +               /*
1080 +                * Send all queued packets.
1081 +                */
1082 +               if (_vtpm_send_queued(chip) == 0) {
1083 +
1084 +                       vtpms->current_request = t;
1085 +
1086 +                       rc = vtpm_vd_send(vtpms->tpm_private,
1087 +                                         buf,
1088 +                                         count,
1089 +                                         t);
1090 +                       /*
1091 +                        * The generic TPM driver will call
1092 +                        * the function to receive the response.
1093 +                        */
1094 +                       if (rc < 0) {
1095 +                               vtpms->current_request = NULL;
1096 +                               goto queue_it;
1097 +                       }
1098 +               } else {
1099 +queue_it:
1100 +                       if (!transmission_set_req_buffer(t, buf, count)) {
1101 +                               transmission_free(t);
1102 +                               rc = -ENOMEM;
1103 +                               goto exit;
1104 +                       }
1105 +                       /*
1106 +                        * An error occurred. Don't event try
1107 +                        * to send the current request. Just
1108 +                        * queue it.
1109 +                        */
1110 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1111 +                       vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
1112 +                       list_add_tail(&t->next, &vtpms->queued_requests);
1113 +                       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1114 +               }
1115 +       }
1116 +
1117 +exit:
1118 +       return rc;
1119 +}
1120 +
1121 +
1122 +/*
1123 + * Send all queued requests.
1124 + */
1125 +static int _vtpm_send_queued(struct tpm_chip *chip)
1126 +{
1127 +       int rc;
1128 +       int error = 0;
1129 +       long flags;
1130 +       unsigned char buffer[1];
1131 +       struct vtpm_state *vtpms;
1132 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1133 +
1134 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1135 +
1136 +       while (!list_empty(&vtpms->queued_requests)) {
1137 +               /*
1138 +                * Need to dequeue them.
1139 +                * Read the result into a dummy buffer.
1140 +                */
1141 +               struct transmission *qt = (struct transmission *)
1142 +                                         vtpms->queued_requests.next;
1143 +               list_del(&qt->next);
1144 +               vtpms->current_request = qt;
1145 +               spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1146 +
1147 +               rc = vtpm_vd_send(vtpms->tpm_private,
1148 +                                 qt->request,
1149 +                                 qt->request_len,
1150 +                                 qt);
1151 +
1152 +               if (rc < 0) {
1153 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1154 +                       if ((qt = vtpms->current_request) != NULL) {
1155 +                               /*
1156 +                                * requeue it at the beginning
1157 +                                * of the list
1158 +                                */
1159 +                               list_add(&qt->next,
1160 +                                        &vtpms->queued_requests);
1161 +                       }
1162 +                       vtpms->current_request = NULL;
1163 +                       error = 1;
1164 +                       break;
1165 +               }
1166 +               /*
1167 +                * After this point qt is not valid anymore!
1168 +                * It is freed when the front-end is delivering
1169 +                * the data by calling tpm_recv
1170 +                */
1171 +               /*
1172 +                * Receive response into provided dummy buffer
1173 +                */
1174 +               rc = vtpm_recv(chip, buffer, sizeof(buffer));
1175 +               spin_lock_irqsave(&vtpms->req_list_lock, flags);
1176 +       }
1177 +
1178 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1179 +
1180 +       return error;
1181 +}
1182 +
1183 +static void vtpm_cancel(struct tpm_chip *chip)
1184 +{
1185 +       unsigned long flags;
1186 +       struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
1187 +
1188 +       spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1189 +
1190 +       if (!vtpms->current_response && vtpms->current_request) {
1191 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1192 +               interruptible_sleep_on(&vtpms->resp_wait_queue);
1193 +               spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1194 +       }
1195 +
1196 +       if (vtpms->current_response) {
1197 +               struct transmission *t = vtpms->current_response;
1198 +               vtpms->current_response = NULL;
1199 +               transmission_free(t);
1200 +       }
1201 +
1202 +       spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
1203 +}
1204 +
1205 +static u8 vtpm_status(struct tpm_chip *chip)
1206 +{
1207 +       u8 rc = 0;
1208 +       unsigned long flags;
1209 +       struct vtpm_state *vtpms;
1210 +
1211 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1212 +
1213 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1214 +       /*
1215 +        * Data are available if:
1216 +        *  - there's a current response
1217 +        *  - the last packet was queued only (this is fake, but necessary to
1218 +        *      get the generic TPM layer to call the receive function.)
1219 +        */
1220 +       if (vtpms->current_response ||
1221 +           0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
1222 +               rc = STATUS_DATA_AVAIL;
1223 +       } else if (!vtpms->current_response && !vtpms->current_request) {
1224 +               rc = STATUS_READY;
1225 +       }
1226 +
1227 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1228 +       return rc;
1229 +}
1230 +
1231 +static struct file_operations vtpm_ops = {
1232 +       .owner = THIS_MODULE,
1233 +       .llseek = no_llseek,
1234 +       .open = tpm_open,
1235 +       .read = tpm_read,
1236 +       .write = tpm_write,
1237 +       .release = tpm_release,
1238 +};
1239 +
1240 +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
1241 +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
1242 +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
1243 +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
1244 +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
1245 +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
1246 +                  NULL);
1247 +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
1248 +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
1249 +
1250 +static struct attribute *vtpm_attrs[] = {
1251 +       &dev_attr_pubek.attr,
1252 +       &dev_attr_pcrs.attr,
1253 +       &dev_attr_enabled.attr,
1254 +       &dev_attr_active.attr,
1255 +       &dev_attr_owned.attr,
1256 +       &dev_attr_temp_deactivated.attr,
1257 +       &dev_attr_caps.attr,
1258 +       &dev_attr_cancel.attr,
1259 +       NULL,
1260 +};
1261 +
1262 +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
1263 +
1264 +#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
1265 +
1266 +static struct tpm_vendor_specific tpm_vtpm = {
1267 +       .recv = vtpm_recv,
1268 +       .send = vtpm_send,
1269 +       .cancel = vtpm_cancel,
1270 +       .status = vtpm_status,
1271 +       .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
1272 +       .req_complete_val  = STATUS_DATA_AVAIL,
1273 +       .req_canceled = STATUS_READY,
1274 +       .attr_group = &vtpm_attr_grp,
1275 +       .miscdev = {
1276 +               .fops = &vtpm_ops,
1277 +       },
1278 +       .duration = {
1279 +               TPM_LONG_TIMEOUT,
1280 +               TPM_LONG_TIMEOUT,
1281 +               TPM_LONG_TIMEOUT,
1282 +       },
1283 +};
1284 +
1285 +struct tpm_chip *init_vtpm(struct device *dev,
1286 +                           struct tpm_private *tp)
1287 +{
1288 +       long rc;
1289 +       struct tpm_chip *chip;
1290 +       struct vtpm_state *vtpms;
1291 +
1292 +       vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
1293 +       if (!vtpms)
1294 +               return ERR_PTR(-ENOMEM);
1295 +
1296 +       vtpm_state_init(vtpms);
1297 +       vtpms->tpm_private = tp;
1298 +
1299 +       chip = tpm_register_hardware(dev, &tpm_vtpm);
1300 +       if (!chip) {
1301 +               rc = -ENODEV;
1302 +               goto err_free_mem;
1303 +       }
1304 +
1305 +       chip_set_private(chip, vtpms);
1306 +
1307 +       return chip;
1308 +
1309 +err_free_mem:
1310 +       kfree(vtpms);
1311 +
1312 +       return ERR_PTR(rc);
1313 +}
1314 +
1315 +void cleanup_vtpm(struct device *dev)
1316 +{
1317 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1318 +       struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
1319 +       tpm_remove_hardware(dev);
1320 +       kfree(vtpms);
1321 +}
1322 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1323 +++ head-2011-02-08/drivers/char/tpm/tpm_vtpm.h 2011-01-31 14:53:38.000000000 +0100
1324 @@ -0,0 +1,55 @@
1325 +#ifndef TPM_VTPM_H
1326 +#define TPM_VTPM_H
1327 +
1328 +struct tpm_chip;
1329 +struct tpm_private;
1330 +
1331 +struct vtpm_state {
1332 +       struct transmission *current_request;
1333 +       spinlock_t           req_list_lock;
1334 +       wait_queue_head_t    req_wait_queue;
1335 +
1336 +       struct list_head     queued_requests;
1337 +
1338 +       struct transmission *current_response;
1339 +       spinlock_t           resp_list_lock;
1340 +       wait_queue_head_t    resp_wait_queue;     // processes waiting for responses
1341 +
1342 +       u8                   vd_status;
1343 +       u8                   flags;
1344 +
1345 +       unsigned long        disconnect_time;
1346 +
1347 +       /*
1348 +        * The following is a private structure of the underlying
1349 +        * driver. It is passed as parameter in the send function.
1350 +        */
1351 +       struct tpm_private *tpm_private;
1352 +};
1353 +
1354 +
1355 +enum vdev_status {
1356 +       TPM_VD_STATUS_DISCONNECTED = 0x0,
1357 +       TPM_VD_STATUS_CONNECTED = 0x1
1358 +};
1359 +
1360 +/* this function is called from tpm_vtpm.c */
1361 +int vtpm_vd_send(struct tpm_private * tp,
1362 +                 const u8 * buf, size_t count, void *ptr);
1363 +
1364 +/* these functions are offered by tpm_vtpm.c */
1365 +struct tpm_chip *init_vtpm(struct device *,
1366 +                           struct tpm_private *);
1367 +void cleanup_vtpm(struct device *);
1368 +int vtpm_vd_recv(const struct tpm_chip* chip,
1369 +                 const unsigned char *buffer, size_t count, void *ptr);
1370 +void vtpm_vd_status(const struct tpm_chip *, u8 status);
1371 +
1372 +static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
1373 +{
1374 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1375 +       struct vtpm_state *vtpms = chip_get_private(chip);
1376 +       return vtpms->tpm_private;
1377 +}
1378 +
1379 +#endif
1380 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1381 +++ head-2011-02-08/drivers/char/tpm/tpm_xen.c  2011-01-31 14:53:38.000000000 +0100
1382 @@ -0,0 +1,722 @@
1383 +/*
1384 + * Copyright (c) 2005, IBM Corporation
1385 + *
1386 + * Author: Stefan Berger, stefanb@us.ibm.com
1387 + * Grant table support: Mahadevan Gomathisankaran
1388 + *
1389 + * This code has been derived from drivers/xen/netfront/netfront.c
1390 + *
1391 + * Copyright (c) 2002-2004, K A Fraser
1392 + *
1393 + * This program is free software; you can redistribute it and/or
1394 + * modify it under the terms of the GNU General Public License version 2
1395 + * as published by the Free Software Foundation; or, when distributed
1396 + * separately from the Linux kernel or incorporated into other
1397 + * software packages, subject to the following license:
1398 + *
1399 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1400 + * of this source file (the "Software"), to deal in the Software without
1401 + * restriction, including without limitation the rights to use, copy, modify,
1402 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1403 + * and to permit persons to whom the Software is furnished to do so, subject to
1404 + * the following conditions:
1405 + *
1406 + * The above copyright notice and this permission notice shall be included in
1407 + * all copies or substantial portions of the Software.
1408 + *
1409 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1410 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1411 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1412 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1413 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1414 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1415 + * IN THE SOFTWARE.
1416 + */
1417 +
1418 +#include <linux/errno.h>
1419 +#include <linux/err.h>
1420 +#include <linux/interrupt.h>
1421 +#include <linux/mutex.h>
1422 +#include <asm/uaccess.h>
1423 +#include <xen/evtchn.h>
1424 +#include <xen/interface/grant_table.h>
1425 +#include <xen/interface/io/tpmif.h>
1426 +#include <xen/gnttab.h>
1427 +#include <xen/xenbus.h>
1428 +#include "tpm.h"
1429 +#include "tpm_vtpm.h"
1430 +
1431 +#undef DEBUG
1432 +
1433 +/* local structures */
1434 +struct tpm_private {
1435 +       struct tpm_chip *chip;
1436 +
1437 +       tpmif_tx_interface_t *tx;
1438 +       atomic_t refcnt;
1439 +       unsigned int irq;
1440 +       u8 is_connected;
1441 +       u8 is_suspended;
1442 +
1443 +       spinlock_t tx_lock;
1444 +
1445 +       struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
1446 +
1447 +       atomic_t tx_busy;
1448 +       void *tx_remember;
1449 +
1450 +       domid_t backend_id;
1451 +       wait_queue_head_t wait_q;
1452 +
1453 +       struct xenbus_device *dev;
1454 +       int ring_ref;
1455 +};
1456 +
1457 +struct tx_buffer {
1458 +       unsigned int size;      // available space in data
1459 +       unsigned int len;       // used space in data
1460 +       unsigned char *data;    // pointer to a page
1461 +};
1462 +
1463 +
1464 +/* locally visible variables */
1465 +static grant_ref_t gref_head;
1466 +static struct tpm_private *my_priv;
1467 +
1468 +/* local function prototypes */
1469 +static irqreturn_t tpmif_int(int irq,
1470 +                             void *tpm_priv,
1471 +                             struct pt_regs *ptregs);
1472 +static void tpmif_rx_action(unsigned long unused);
1473 +static int tpmif_connect(struct xenbus_device *dev,
1474 +                         struct tpm_private *tp,
1475 +                         domid_t domid);
1476 +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
1477 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
1478 +static void tpmif_free_tx_buffers(struct tpm_private *tp);
1479 +static void tpmif_set_connected_state(struct tpm_private *tp,
1480 +                                      u8 newstate);
1481 +static int tpm_xmit(struct tpm_private *tp,
1482 +                    const u8 * buf, size_t count, int userbuffer,
1483 +                    void *remember);
1484 +static void destroy_tpmring(struct tpm_private *tp);
1485 +void __exit tpmif_exit(void);
1486 +
1487 +#define DPRINTK(fmt, args...) \
1488 +    pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
1489 +#define IPRINTK(fmt, args...) \
1490 +    printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
1491 +#define WPRINTK(fmt, args...) \
1492 +    printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
1493 +
1494 +#define GRANT_INVALID_REF      0
1495 +
1496 +
1497 +static inline int
1498 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
1499 +               int isuserbuffer)
1500 +{
1501 +       int copied = len;
1502 +
1503 +       if (len > txb->size)
1504 +               copied = txb->size;
1505 +       if (isuserbuffer) {
1506 +               if (copy_from_user(txb->data, src, copied))
1507 +                       return -EFAULT;
1508 +       } else {
1509 +               memcpy(txb->data, src, copied);
1510 +       }
1511 +       txb->len = len;
1512 +       return copied;
1513 +}
1514 +
1515 +static inline struct tx_buffer *tx_buffer_alloc(void)
1516 +{
1517 +       struct tx_buffer *txb;
1518 +
1519 +       txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
1520 +       if (!txb)
1521 +               return NULL;
1522 +
1523 +       txb->len = 0;
1524 +       txb->size = PAGE_SIZE;
1525 +       txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1526 +       if (txb->data == NULL) {
1527 +               kfree(txb);
1528 +               txb = NULL;
1529 +       }
1530 +
1531 +       return txb;
1532 +}
1533 +
1534 +
1535 +static inline void tx_buffer_free(struct tx_buffer *txb)
1536 +{
1537 +       if (txb) {
1538 +               free_page((long)txb->data);
1539 +               kfree(txb);
1540 +       }
1541 +}
1542 +
1543 +/**************************************************************
1544 + Utility function for the tpm_private structure
1545 +**************************************************************/
1546 +static void tpm_private_init(struct tpm_private *tp)
1547 +{
1548 +       spin_lock_init(&tp->tx_lock);
1549 +       init_waitqueue_head(&tp->wait_q);
1550 +       atomic_set(&tp->refcnt, 1);
1551 +}
1552 +
1553 +static void tpm_private_put(void)
1554 +{
1555 +       if (!atomic_dec_and_test(&my_priv->refcnt))
1556 +               return;
1557 +
1558 +       tpmif_free_tx_buffers(my_priv);
1559 +       kfree(my_priv);
1560 +       my_priv = NULL;
1561 +}
1562 +
1563 +static struct tpm_private *tpm_private_get(void)
1564 +{
1565 +       int err;
1566 +
1567 +       if (my_priv) {
1568 +               atomic_inc(&my_priv->refcnt);
1569 +               return my_priv;
1570 +       }
1571 +
1572 +       my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1573 +       if (!my_priv)
1574 +               return NULL;
1575 +
1576 +       tpm_private_init(my_priv);
1577 +       err = tpmif_allocate_tx_buffers(my_priv);
1578 +       if (err < 0)
1579 +               tpm_private_put();
1580 +
1581 +       return my_priv;
1582 +}
1583 +
1584 +/**************************************************************
1585 +
1586 + The interface to let the tpm plugin register its callback
1587 + function and send data to another partition using this module
1588 +
1589 +**************************************************************/
1590 +
1591 +static DEFINE_MUTEX(suspend_lock);
1592 +/*
1593 + * Send data via this module by calling this function
1594 + */
1595 +int vtpm_vd_send(struct tpm_private *tp,
1596 +                 const u8 * buf, size_t count, void *ptr)
1597 +{
1598 +       int sent;
1599 +
1600 +       mutex_lock(&suspend_lock);
1601 +       sent = tpm_xmit(tp, buf, count, 0, ptr);
1602 +       mutex_unlock(&suspend_lock);
1603 +
1604 +       return sent;
1605 +}
1606 +
1607 +/**************************************************************
1608 + XENBUS support code
1609 +**************************************************************/
1610 +
1611 +static int setup_tpmring(struct xenbus_device *dev,
1612 +                         struct tpm_private *tp)
1613 +{
1614 +       tpmif_tx_interface_t *sring;
1615 +       int err;
1616 +
1617 +       tp->ring_ref = GRANT_INVALID_REF;
1618 +
1619 +       sring = (void *)__get_free_page(GFP_KERNEL);
1620 +       if (!sring) {
1621 +               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1622 +               return -ENOMEM;
1623 +       }
1624 +       tp->tx = sring;
1625 +
1626 +       err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1627 +       if (err < 0) {
1628 +               free_page((unsigned long)sring);
1629 +               tp->tx = NULL;
1630 +               xenbus_dev_fatal(dev, err, "allocating grant reference");
1631 +               goto fail;
1632 +       }
1633 +       tp->ring_ref = err;
1634 +
1635 +       err = tpmif_connect(dev, tp, dev->otherend_id);
1636 +       if (err)
1637 +               goto fail;
1638 +
1639 +       return 0;
1640 +fail:
1641 +       destroy_tpmring(tp);
1642 +       return err;
1643 +}
1644 +
1645 +
1646 +static void destroy_tpmring(struct tpm_private *tp)
1647 +{
1648 +       tpmif_set_connected_state(tp, 0);
1649 +
1650 +       if (tp->ring_ref != GRANT_INVALID_REF) {
1651 +               gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1652 +               tp->ring_ref = GRANT_INVALID_REF;
1653 +               tp->tx = NULL;
1654 +       }
1655 +
1656 +       if (tp->irq)
1657 +               unbind_from_irqhandler(tp->irq, tp);
1658 +
1659 +       tp->irq = 0;
1660 +}
1661 +
1662 +
1663 +static int talk_to_backend(struct xenbus_device *dev,
1664 +                           struct tpm_private *tp)
1665 +{
1666 +       const char *message = NULL;
1667 +       int err;
1668 +       struct xenbus_transaction xbt;
1669 +
1670 +       err = setup_tpmring(dev, tp);
1671 +       if (err) {
1672 +               xenbus_dev_fatal(dev, err, "setting up ring");
1673 +               goto out;
1674 +       }
1675 +
1676 +again:
1677 +       err = xenbus_transaction_start(&xbt);
1678 +       if (err) {
1679 +               xenbus_dev_fatal(dev, err, "starting transaction");
1680 +               goto destroy_tpmring;
1681 +       }
1682 +
1683 +       err = xenbus_printf(xbt, dev->nodename,
1684 +                           "ring-ref","%u", tp->ring_ref);
1685 +       if (err) {
1686 +               message = "writing ring-ref";
1687 +               goto abort_transaction;
1688 +       }
1689 +
1690 +       err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1691 +                           irq_to_evtchn_port(tp->irq));
1692 +       if (err) {
1693 +               message = "writing event-channel";
1694 +               goto abort_transaction;
1695 +       }
1696 +
1697 +       err = xenbus_transaction_end(xbt, 0);
1698 +       if (err == -EAGAIN)
1699 +               goto again;
1700 +       if (err) {
1701 +               xenbus_dev_fatal(dev, err, "completing transaction");
1702 +               goto destroy_tpmring;
1703 +       }
1704 +
1705 +       xenbus_switch_state(dev, XenbusStateConnected);
1706 +
1707 +       return 0;
1708 +
1709 +abort_transaction:
1710 +       xenbus_transaction_end(xbt, 1);
1711 +       if (message)
1712 +               xenbus_dev_error(dev, err, "%s", message);
1713 +destroy_tpmring:
1714 +       destroy_tpmring(tp);
1715 +out:
1716 +       return err;
1717 +}
1718 +
1719 +/**
1720 + * Callback received when the backend's state changes.
1721 + */
1722 +static void backend_changed(struct xenbus_device *dev,
1723 +                           enum xenbus_state backend_state)
1724 +{
1725 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1726 +       DPRINTK("\n");
1727 +
1728 +       switch (backend_state) {
1729 +       case XenbusStateInitialising:
1730 +       case XenbusStateInitWait:
1731 +       case XenbusStateInitialised:
1732 +       case XenbusStateReconfiguring:
1733 +       case XenbusStateReconfigured:
1734 +       case XenbusStateUnknown:
1735 +               break;
1736 +
1737 +       case XenbusStateConnected:
1738 +               tpmif_set_connected_state(tp, 1);
1739 +               break;
1740 +
1741 +       case XenbusStateClosing:
1742 +               tpmif_set_connected_state(tp, 0);
1743 +               xenbus_frontend_closed(dev);
1744 +               break;
1745 +
1746 +       case XenbusStateClosed:
1747 +               tpmif_set_connected_state(tp, 0);
1748 +               if (tp->is_suspended == 0)
1749 +                       device_unregister(&dev->dev);
1750 +               xenbus_frontend_closed(dev);
1751 +               break;
1752 +       }
1753 +}
1754 +
1755 +static int tpmfront_probe(struct xenbus_device *dev,
1756 +                          const struct xenbus_device_id *id)
1757 +{
1758 +       int err;
1759 +       int handle;
1760 +       struct tpm_private *tp = tpm_private_get();
1761 +
1762 +       if (!tp)
1763 +               return -ENOMEM;
1764 +
1765 +       tp->chip = init_vtpm(&dev->dev, tp);
1766 +       if (IS_ERR(tp->chip))
1767 +               return PTR_ERR(tp->chip);
1768 +
1769 +       err = xenbus_scanf(XBT_NIL, dev->nodename,
1770 +                          "handle", "%i", &handle);
1771 +       if (XENBUS_EXIST_ERR(err))
1772 +               return err;
1773 +
1774 +       if (err < 0) {
1775 +               xenbus_dev_fatal(dev,err,"reading virtual-device");
1776 +               return err;
1777 +       }
1778 +
1779 +       tp->dev = dev;
1780 +
1781 +       err = talk_to_backend(dev, tp);
1782 +       if (err) {
1783 +               tpm_private_put();
1784 +               return err;
1785 +       }
1786 +
1787 +       return 0;
1788 +}
1789 +
1790 +
1791 +static int tpmfront_remove(struct xenbus_device *dev)
1792 +{
1793 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1794 +       destroy_tpmring(tp);
1795 +       cleanup_vtpm(&dev->dev);
1796 +       return 0;
1797 +}
1798 +
1799 +static int tpmfront_suspend(struct xenbus_device *dev)
1800 +{
1801 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1802 +       u32 ctr;
1803 +
1804 +       /* Take the lock, preventing any application from sending. */
1805 +       mutex_lock(&suspend_lock);
1806 +       tp->is_suspended = 1;
1807 +
1808 +       for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1809 +               if ((ctr % 10) == 0)
1810 +                       printk("TPM-FE [INFO]: Waiting for outstanding "
1811 +                              "request.\n");
1812 +               /* Wait for a request to be responded to. */
1813 +               interruptible_sleep_on_timeout(&tp->wait_q, 100);
1814 +       }
1815 +
1816 +       return 0;
1817 +}
1818 +
1819 +static int tpmfront_suspend_finish(struct tpm_private *tp)
1820 +{
1821 +       tp->is_suspended = 0;
1822 +       /* Allow applications to send again. */
1823 +       mutex_unlock(&suspend_lock);
1824 +       return 0;
1825 +}
1826 +
1827 +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1828 +{
1829 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1830 +       return tpmfront_suspend_finish(tp);
1831 +}
1832 +
1833 +static int tpmfront_resume(struct xenbus_device *dev)
1834 +{
1835 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1836 +       destroy_tpmring(tp);
1837 +       return talk_to_backend(dev, tp);
1838 +}
1839 +
1840 +static int tpmif_connect(struct xenbus_device *dev,
1841 +                         struct tpm_private *tp,
1842 +                         domid_t domid)
1843 +{
1844 +       int err;
1845 +
1846 +       tp->backend_id = domid;
1847 +
1848 +       err = bind_listening_port_to_irqhandler(
1849 +               domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1850 +       if (err <= 0) {
1851 +               WPRINTK("bind_listening_port_to_irqhandler failed "
1852 +                       "(err=%d)\n", err);
1853 +               return err;
1854 +       }
1855 +       tp->irq = err;
1856 +
1857 +       return 0;
1858 +}
1859 +
1860 +static struct xenbus_device_id tpmfront_ids[] = {
1861 +       { "vtpm" },
1862 +       { "" }
1863 +};
1864 +
1865 +static struct xenbus_driver tpmfront = {
1866 +       .name = "vtpm",
1867 +       .owner = THIS_MODULE,
1868 +       .ids = tpmfront_ids,
1869 +       .probe = tpmfront_probe,
1870 +       .remove =  tpmfront_remove,
1871 +       .resume = tpmfront_resume,
1872 +       .otherend_changed = backend_changed,
1873 +       .suspend = tpmfront_suspend,
1874 +       .suspend_cancel = tpmfront_suspend_cancel,
1875 +};
1876 +
1877 +static void __init init_tpm_xenbus(void)
1878 +{
1879 +       xenbus_register_frontend(&tpmfront);
1880 +}
1881 +
1882 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1883 +{
1884 +       unsigned int i;
1885 +
1886 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1887 +               tp->tx_buffers[i] = tx_buffer_alloc();
1888 +               if (!tp->tx_buffers[i]) {
1889 +                       tpmif_free_tx_buffers(tp);
1890 +                       return -ENOMEM;
1891 +               }
1892 +       }
1893 +       return 0;
1894 +}
1895 +
1896 +static void tpmif_free_tx_buffers(struct tpm_private *tp)
1897 +{
1898 +       unsigned int i;
1899 +
1900 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1901 +               tx_buffer_free(tp->tx_buffers[i]);
1902 +}
1903 +
1904 +static void tpmif_rx_action(unsigned long priv)
1905 +{
1906 +       struct tpm_private *tp = (struct tpm_private *)priv;
1907 +       int i = 0;
1908 +       unsigned int received;
1909 +       unsigned int offset = 0;
1910 +       u8 *buffer;
1911 +       tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1912 +
1913 +       atomic_set(&tp->tx_busy, 0);
1914 +       wake_up_interruptible(&tp->wait_q);
1915 +
1916 +       received = tx->size;
1917 +
1918 +       buffer = kmalloc(received, GFP_ATOMIC);
1919 +       if (!buffer)
1920 +               return;
1921 +
1922 +       for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1923 +               struct tx_buffer *txb = tp->tx_buffers[i];
1924 +               tpmif_tx_request_t *tx;
1925 +               unsigned int tocopy;
1926 +
1927 +               tx = &tp->tx->ring[i].req;
1928 +               tocopy = tx->size;
1929 +               if (tocopy > PAGE_SIZE)
1930 +                       tocopy = PAGE_SIZE;
1931 +
1932 +               memcpy(&buffer[offset], txb->data, tocopy);
1933 +
1934 +               gnttab_release_grant_reference(&gref_head, tx->ref);
1935 +
1936 +               offset += tocopy;
1937 +       }
1938 +
1939 +       vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
1940 +       kfree(buffer);
1941 +}
1942 +
1943 +
1944 +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
1945 +{
1946 +       struct tpm_private *tp = tpm_priv;
1947 +       unsigned long flags;
1948 +
1949 +       spin_lock_irqsave(&tp->tx_lock, flags);
1950 +       tpmif_rx_tasklet.data = (unsigned long)tp;
1951 +       tasklet_schedule(&tpmif_rx_tasklet);
1952 +       spin_unlock_irqrestore(&tp->tx_lock, flags);
1953 +
1954 +       return IRQ_HANDLED;
1955 +}
1956 +
1957 +
1958 +static int tpm_xmit(struct tpm_private *tp,
1959 +                    const u8 * buf, size_t count, int isuserbuffer,
1960 +                    void *remember)
1961 +{
1962 +       tpmif_tx_request_t *tx;
1963 +       TPMIF_RING_IDX i;
1964 +       unsigned int offset = 0;
1965 +
1966 +       spin_lock_irq(&tp->tx_lock);
1967 +
1968 +       if (unlikely(atomic_read(&tp->tx_busy))) {
1969 +               printk("tpm_xmit: There's an outstanding request/response "
1970 +                      "on the way!\n");
1971 +               spin_unlock_irq(&tp->tx_lock);
1972 +               return -EBUSY;
1973 +       }
1974 +
1975 +       if (tp->is_connected != 1) {
1976 +               spin_unlock_irq(&tp->tx_lock);
1977 +               return -EIO;
1978 +       }
1979 +
1980 +       for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
1981 +               struct tx_buffer *txb = tp->tx_buffers[i];
1982 +               int copied;
1983 +
1984 +               if (!txb) {
1985 +                       DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
1986 +                               "Not transmitting anything!\n", i);
1987 +                       spin_unlock_irq(&tp->tx_lock);
1988 +                       return -EFAULT;
1989 +               }
1990 +
1991 +               copied = tx_buffer_copy(txb, &buf[offset], count,
1992 +                                       isuserbuffer);
1993 +               if (copied < 0) {
1994 +                       /* An error occurred */
1995 +                       spin_unlock_irq(&tp->tx_lock);
1996 +                       return copied;
1997 +               }
1998 +               count -= copied;
1999 +               offset += copied;
2000 +
2001 +               tx = &tp->tx->ring[i].req;
2002 +               tx->addr = virt_to_machine(txb->data);
2003 +               tx->size = txb->len;
2004 +               tx->unused = 0;
2005 +
2006 +               DPRINTK("First 4 characters sent by TPM-FE are "
2007 +                       "0x%02x 0x%02x 0x%02x 0x%02x\n",
2008 +                       txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
2009 +
2010 +               /* Get the granttable reference for this page. */
2011 +               tx->ref = gnttab_claim_grant_reference(&gref_head);
2012 +               if (tx->ref == -ENOSPC) {
2013 +                       spin_unlock_irq(&tp->tx_lock);
2014 +                       DPRINTK("Grant table claim reference failed in "
2015 +                               "func:%s line:%d file:%s\n",
2016 +                               __FUNCTION__, __LINE__, __FILE__);
2017 +                       return -ENOSPC;
2018 +               }
2019 +               gnttab_grant_foreign_access_ref(tx->ref,
2020 +                                               tp->backend_id,
2021 +                                               virt_to_mfn(txb->data),
2022 +                                               0 /*RW*/);
2023 +               wmb();
2024 +       }
2025 +
2026 +       atomic_set(&tp->tx_busy, 1);
2027 +       tp->tx_remember = remember;
2028 +
2029 +       mb();
2030 +
2031 +       notify_remote_via_irq(tp->irq);
2032 +
2033 +       spin_unlock_irq(&tp->tx_lock);
2034 +       return offset;
2035 +}
2036 +
2037 +
2038 +static void tpmif_notify_upperlayer(struct tpm_private *tp)
2039 +{
2040 +       /* Notify upper layer about the state of the connection to the BE. */
2041 +       vtpm_vd_status(tp->chip, (tp->is_connected
2042 +                                 ? TPM_VD_STATUS_CONNECTED
2043 +                                 : TPM_VD_STATUS_DISCONNECTED));
2044 +}
2045 +
2046 +
2047 +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
2048 +{
2049 +       /*
2050 +        * Don't notify upper layer if we are in suspend mode and
2051 +        * should disconnect - assumption is that we will resume
2052 +        * The mutex keeps apps from sending.
2053 +        */
2054 +       if (is_connected == 0 && tp->is_suspended == 1)
2055 +               return;
2056 +
2057 +       /*
2058 +        * Unlock the mutex if we are connected again
2059 +        * after being suspended - now resuming.
2060 +        * This also removes the suspend state.
2061 +        */
2062 +       if (is_connected == 1 && tp->is_suspended == 1)
2063 +               tpmfront_suspend_finish(tp);
2064 +
2065 +       if (is_connected != tp->is_connected) {
2066 +               tp->is_connected = is_connected;
2067 +               tpmif_notify_upperlayer(tp);
2068 +       }
2069 +}
2070 +
2071 +
2072 +
2073 +/* =================================================================
2074 + * Initialization function.
2075 + * =================================================================
2076 + */
2077 +
2078 +
2079 +static int __init tpmif_init(void)
2080 +{
2081 +       struct tpm_private *tp;
2082 +
2083 +       if (is_initial_xendomain())
2084 +               return -EPERM;
2085 +
2086 +       tp = tpm_private_get();
2087 +       if (!tp)
2088 +               return -ENOMEM;
2089 +
2090 +       IPRINTK("Initialising the vTPM driver.\n");
2091 +       if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
2092 +                                         &gref_head) < 0) {
2093 +               tpm_private_put();
2094 +               return -EFAULT;
2095 +       }
2096 +
2097 +       init_tpm_xenbus();
2098 +       return 0;
2099 +}
2100 +
2101 +
2102 +module_init(tpmif_init);
2103 +
2104 +MODULE_LICENSE("Dual BSD/GPL");
2105 --- head-2011-02-08.orig/drivers/edac/edac_mc.c 2011-01-31 12:42:05.000000000 +0100
2106 +++ head-2011-02-08/drivers/edac/edac_mc.c      2011-01-31 14:53:38.000000000 +0100
2107 @@ -615,6 +615,10 @@ static void edac_mc_scrub_block(unsigned
2108  
2109         debugf3("%s()\n", __func__);
2110  
2111 +#ifdef CONFIG_XEN
2112 +       page = mfn_to_local_pfn(page);
2113 +#endif
2114 +
2115         /* ECC error page was not in our memory. Ignore it. */
2116         if (!pfn_valid(page))
2117                 return;
2118 --- head-2011-02-08.orig/drivers/firmware/dell_rbu.c    2010-08-02 00:11:14.000000000 +0200
2119 +++ head-2011-02-08/drivers/firmware/dell_rbu.c 2011-01-31 14:53:38.000000000 +0100
2120 @@ -170,9 +170,28 @@ static int create_packet(void *data, siz
2121                         spin_lock(&rbu_data.lock);
2122                         goto out_alloc_packet_array;
2123                 }
2124 +#ifdef CONFIG_XEN
2125 +               if (ordernum && xen_create_contiguous_region(
2126 +                       (unsigned long)packet_data_temp_buf, ordernum, 0)) {
2127 +                       free_pages((unsigned long)packet_data_temp_buf,
2128 +                                  ordernum);
2129 +                       printk(KERN_WARNING
2130 +                               "dell_rbu:%s: failed to adjust new "
2131 +                               "packet\n", __func__);
2132 +                       retval = -ENOMEM;
2133 +                       spin_lock(&rbu_data.lock);
2134 +                       goto out_alloc_packet_array;
2135 +               }
2136 +#endif
2137  
2138 -               if ((unsigned long)virt_to_phys(packet_data_temp_buf)
2139 +               if ((unsigned long)virt_to_bus(packet_data_temp_buf)
2140                                 < allocation_floor) {
2141 +#ifdef CONFIG_XEN
2142 +                       if (ordernum)
2143 +                               xen_destroy_contiguous_region(
2144 +                                       (unsigned long)packet_data_temp_buf,
2145 +                                       ordernum);
2146 +#endif
2147                         pr_debug("packet 0x%lx below floor at 0x%lx.\n",
2148                                         (unsigned long)virt_to_phys(
2149                                                 packet_data_temp_buf),
2150 @@ -186,7 +205,7 @@ static int create_packet(void *data, siz
2151         newpacket->data = packet_data_temp_buf;
2152  
2153         pr_debug("create_packet: newpacket at physical addr %lx\n",
2154 -               (unsigned long)virt_to_phys(newpacket->data));
2155 +               (unsigned long)virt_to_bus(newpacket->data));
2156  
2157         /* packets may not have fixed size */
2158         newpacket->length = length;
2159 @@ -205,7 +224,7 @@ out_alloc_packet_array:
2160         /* always free packet array */
2161         for (;idx>0;idx--) {
2162                 pr_debug("freeing unused packet below floor 0x%lx.\n",
2163 -                       (unsigned long)virt_to_phys(
2164 +                       (unsigned long)virt_to_bus(
2165                                 invalid_addr_packet_array[idx-1]));
2166                 free_pages((unsigned long)invalid_addr_packet_array[idx-1],
2167                         ordernum);
2168 @@ -349,6 +368,13 @@ static void packet_empty_list(void)
2169                  * to make sure there are no stale RBU packets left in memory
2170                  */
2171                 memset(newpacket->data, 0, rbu_data.packetsize);
2172 +#ifdef CONFIG_XEN
2173 +               if (newpacket->ordernum)
2174 +                       xen_destroy_contiguous_region(
2175 +                               (unsigned long)newpacket->data,
2176 +                               newpacket->ordernum);
2177 +#endif
2178 +
2179                 free_pages((unsigned long) newpacket->data,
2180                         newpacket->ordernum);
2181                 kfree(newpacket);
2182 @@ -403,7 +429,9 @@ static int img_update_realloc(unsigned l
2183  {
2184         unsigned char *image_update_buffer = NULL;
2185         unsigned long rc;
2186 +#ifndef CONFIG_XEN
2187         unsigned long img_buf_phys_addr;
2188 +#endif
2189         int ordernum;
2190         int dma_alloc = 0;
2191  
2192 @@ -434,15 +462,19 @@ static int img_update_realloc(unsigned l
2193  
2194         spin_unlock(&rbu_data.lock);
2195  
2196 +#ifndef CONFIG_XEN
2197         ordernum = get_order(size);
2198         image_update_buffer =
2199                 (unsigned char *) __get_free_pages(GFP_KERNEL, ordernum);
2200  
2201         img_buf_phys_addr =
2202 -               (unsigned long) virt_to_phys(image_update_buffer);
2203 +               (unsigned long) virt_to_bus(image_update_buffer);
2204  
2205         if (img_buf_phys_addr > BIOS_SCAN_LIMIT) {
2206                 free_pages((unsigned long) image_update_buffer, ordernum);
2207 +#else
2208 +       {
2209 +#endif
2210                 ordernum = -1;
2211                 image_update_buffer = dma_alloc_coherent(NULL, size,
2212                         &dell_rbu_dmaaddr, GFP_KERNEL);
2213 @@ -695,6 +727,12 @@ static struct bin_attribute rbu_packet_s
2214  static int __init dcdrbu_init(void)
2215  {
2216         int rc;
2217 +
2218 +#ifdef CONFIG_XEN
2219 +       if (!is_initial_xendomain())
2220 +               return -ENODEV;
2221 +#endif
2222 +
2223         spin_lock_init(&rbu_data.lock);
2224  
2225         init_packet_head();
2226 --- head-2011-02-08.orig/drivers/ide/ide-lib.c  2009-09-10 00:13:59.000000000 +0200
2227 +++ head-2011-02-08/drivers/ide/ide-lib.c       2011-01-31 14:53:38.000000000 +0100
2228 @@ -18,12 +18,12 @@ void ide_toggle_bounce(ide_drive_t *driv
2229  {
2230         u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
2231  
2232 -       if (!PCI_DMA_BUS_IS_PHYS) {
2233 -               addr = BLK_BOUNCE_ANY;
2234 -       } else if (on && drive->media == ide_disk) {
2235 +       if (on && drive->media == ide_disk) {
2236                 struct device *dev = drive->hwif->dev;
2237  
2238 -               if (dev && dev->dma_mask)
2239 +               if (!PCI_DMA_BUS_IS_PHYS)
2240 +                       addr = BLK_BOUNCE_ANY;
2241 +               else if (dev && dev->dma_mask)
2242                         addr = *dev->dma_mask;
2243         }
2244  
2245 --- head-2011-02-08.orig/drivers/oprofile/buffer_sync.c 2011-01-05 01:50:19.000000000 +0100
2246 +++ head-2011-02-08/drivers/oprofile/buffer_sync.c      2011-01-31 14:53:38.000000000 +0100
2247 @@ -8,6 +8,10 @@
2248   * @author Barry Kasindorf
2249   * @author Robert Richter <robert.richter@amd.com>
2250   *
2251 + * Modified by Aravind Menon for Xen
2252 + * These modifications are:
2253 + * Copyright (C) 2005 Hewlett-Packard Co.
2254 + *
2255   * This is the core of the buffer management. Each
2256   * CPU buffer is processed and entered into the
2257   * global event buffer. Such processing is necessary
2258 @@ -43,6 +47,8 @@ static cpumask_var_t marked_cpus;
2259  static DEFINE_SPINLOCK(task_mortuary);
2260  static void process_task_mortuary(void);
2261  
2262 +static int cpu_current_domain[NR_CPUS];
2263 +
2264  /* Take ownership of the task struct and place it on the
2265   * list for processing. Only after two full buffer syncs
2266   * does the task eventually get freed, because by then
2267 @@ -61,7 +67,6 @@ task_free_notify(struct notifier_block *
2268         return NOTIFY_OK;
2269  }
2270  
2271 -
2272  /* The task is on its way out. A sync of the buffer means we can catch
2273   * any remaining samples for this task.
2274   */
2275 @@ -144,6 +149,11 @@ static struct notifier_block module_load
2276  int sync_start(void)
2277  {
2278         int err;
2279 +       int i;
2280 +
2281 +       for (i = 0; i < NR_CPUS; i++) {
2282 +               cpu_current_domain[i] = COORDINATOR_DOMAIN;
2283 +       }
2284  
2285         if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
2286                 return -ENOMEM;
2287 @@ -286,13 +296,29 @@ static void add_cpu_switch(int i)
2288         last_cookie = INVALID_COOKIE;
2289  }
2290  
2291 -static void add_kernel_ctx_switch(unsigned int in_kernel)
2292 +static void add_cpu_mode_switch(unsigned int cpu_mode)
2293  {
2294         add_event_entry(ESCAPE_CODE);
2295 -       if (in_kernel)
2296 +       switch (cpu_mode) {
2297 +       case CPU_MODE_USER:
2298 +               add_event_entry(USER_ENTER_SWITCH_CODE);
2299 +               break;
2300 +       case CPU_MODE_KERNEL:
2301                 add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2302 -       else
2303 -               add_event_entry(KERNEL_EXIT_SWITCH_CODE);
2304 +               break;
2305 +       case CPU_MODE_XEN:
2306 +               add_event_entry(XEN_ENTER_SWITCH_CODE);
2307 +               break;
2308 +       default:
2309 +               break;
2310 +       }
2311 +}
2312 +
2313 +static void add_domain_switch(unsigned long domain_id)
2314 +{
2315 +       add_event_entry(ESCAPE_CODE);
2316 +       add_event_entry(DOMAIN_SWITCH_CODE);
2317 +       add_event_entry(domain_id);
2318  }
2319  
2320  static void
2321 @@ -373,12 +399,12 @@ static inline void add_sample_entry(unsi
2322   * for later lookup from userspace. Return 0 on failure.
2323   */
2324  static int
2325 -add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
2326 +add_sample(struct mm_struct *mm, struct op_sample *s, int cpu_mode)
2327  {
2328         unsigned long cookie;
2329         off_t offset;
2330  
2331 -       if (in_kernel) {
2332 +       if (cpu_mode >= CPU_MODE_KERNEL) {
2333                 add_sample_entry(s->eip, s->event);
2334                 return 1;
2335         }
2336 @@ -503,9 +529,10 @@ void sync_buffer(int cpu)
2337         unsigned long val;
2338         struct task_struct *new;
2339         unsigned long cookie = 0;
2340 -       int in_kernel = 1;
2341 +       int cpu_mode = CPU_MODE_KERNEL;
2342         sync_buffer_state state = sb_buffer_start;
2343         unsigned int i;
2344 +       int domain_switch = 0;
2345         unsigned long available;
2346         unsigned long flags;
2347         struct op_entry entry;
2348 @@ -515,6 +542,11 @@ void sync_buffer(int cpu)
2349  
2350         add_cpu_switch(cpu);
2351  
2352 +       /* We need to assign the first samples in this CPU buffer to the
2353 +          same domain that we were processing at the last sync_buffer */
2354 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2355 +               add_domain_switch(cpu_current_domain[cpu]);
2356 +
2357         op_cpu_buffer_reset(cpu);
2358         available = op_cpu_buffer_entries(cpu);
2359  
2360 @@ -523,6 +555,13 @@ void sync_buffer(int cpu)
2361                 if (!sample)
2362                         break;
2363  
2364 +               if (domain_switch) {
2365 +                       cpu_current_domain[cpu] = sample->eip;
2366 +                       add_domain_switch(sample->eip);
2367 +                       domain_switch = 0;
2368 +                       continue;
2369 +               }
2370 +
2371                 if (is_code(sample->eip)) {
2372                         flags = sample->event;
2373                         if (flags & TRACE_BEGIN) {
2374 @@ -531,10 +570,10 @@ void sync_buffer(int cpu)
2375                         }
2376                         if (flags & KERNEL_CTX_SWITCH) {
2377                                 /* kernel/userspace switch */
2378 -                               in_kernel = flags & IS_KERNEL;
2379 +                               cpu_mode = flags & CPU_MODE_MASK;
2380                                 if (state == sb_buffer_start)
2381                                         state = sb_sample_start;
2382 -                               add_kernel_ctx_switch(flags & IS_KERNEL);
2383 +                               add_cpu_mode_switch(cpu_mode);
2384                         }
2385                         if (flags & USER_CTX_SWITCH
2386                             && op_cpu_buffer_get_data(&entry, &val)) {
2387 @@ -547,16 +586,23 @@ void sync_buffer(int cpu)
2388                                         cookie = get_exec_dcookie(mm);
2389                                 add_user_ctx_switch(new, cookie);
2390                         }
2391 +                       if (flags & DOMAIN_SWITCH)
2392 +                               domain_switch = 1;
2393                         if (op_cpu_buffer_get_size(&entry))
2394                                 add_data(&entry, mm);
2395                         continue;
2396                 }
2397  
2398 +               if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2399 +                       add_sample_entry(sample->eip, sample->event);
2400 +                       continue;
2401 +               }
2402 +
2403                 if (state < sb_bt_start)
2404                         /* ignore sample */
2405                         continue;
2406  
2407 -               if (add_sample(mm, sample, in_kernel))
2408 +               if (add_sample(mm, sample, cpu_mode))
2409                         continue;
2410  
2411                 /* ignore backtraces if failed to add a sample */
2412 @@ -567,6 +613,10 @@ void sync_buffer(int cpu)
2413         }
2414         release_mm(mm);
2415  
2416 +       /* We reset domain to COORDINATOR at each CPU switch */
2417 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2418 +               add_domain_switch(COORDINATOR_DOMAIN);
2419 +
2420         mark_done(cpu);
2421  
2422         mutex_unlock(&buffer_mutex);
2423 --- head-2011-02-08.orig/drivers/oprofile/cpu_buffer.c  2011-01-05 01:50:19.000000000 +0100
2424 +++ head-2011-02-08/drivers/oprofile/cpu_buffer.c       2011-01-31 14:53:38.000000000 +0100
2425 @@ -8,6 +8,10 @@
2426   * @author Barry Kasindorf <barry.kasindorf@amd.com>
2427   * @author Robert Richter <robert.richter@amd.com>
2428   *
2429 + * Modified by Aravind Menon for Xen
2430 + * These modifications are:
2431 + * Copyright (C) 2005 Hewlett-Packard Co.
2432 + *
2433   * Each CPU has a local buffer that stores PC value/event
2434   * pairs. We also log context switches when we notice them.
2435   * Eventually each CPU's buffer is processed into the global
2436 @@ -38,6 +42,8 @@ static void wq_sync_buffer(struct work_s
2437  #define DEFAULT_TIMER_EXPIRE (HZ / 10)
2438  static int work_enabled;
2439  
2440 +static int32_t current_domain = COORDINATOR_DOMAIN;
2441 +
2442  unsigned long oprofile_get_cpu_buffer_size(void)
2443  {
2444         return oprofile_cpu_buffer_size;
2445 @@ -75,7 +81,7 @@ int alloc_cpu_buffers(void)
2446                 struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
2447  
2448                 b->last_task = NULL;
2449 -               b->last_is_kernel = -1;
2450 +               b->last_cpu_mode = -1;
2451                 b->tracing = 0;
2452                 b->buffer_size = buffer_size;
2453                 b->sample_received = 0;
2454 @@ -180,7 +186,7 @@ unsigned long op_cpu_buffer_entries(int 
2455  
2456  static int
2457  op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
2458 -           int is_kernel, struct task_struct *task)
2459 +           int cpu_mode, struct task_struct *task)
2460  {
2461         struct op_entry entry;
2462         struct op_sample *sample;
2463 @@ -193,16 +199,15 @@ op_add_code(struct oprofile_cpu_buffer *
2464                 flags |= TRACE_BEGIN;
2465  
2466         /* notice a switch from user->kernel or vice versa */
2467 -       is_kernel = !!is_kernel;
2468 -       if (cpu_buf->last_is_kernel != is_kernel) {
2469 -               cpu_buf->last_is_kernel = is_kernel;
2470 -               flags |= KERNEL_CTX_SWITCH;
2471 -               if (is_kernel)
2472 -                       flags |= IS_KERNEL;
2473 +       if (cpu_buf->last_cpu_mode != cpu_mode) {
2474 +               cpu_buf->last_cpu_mode = cpu_mode;
2475 +               flags |= KERNEL_CTX_SWITCH | cpu_mode;
2476         }
2477  
2478         /* notice a task switch */
2479 -       if (cpu_buf->last_task != task) {
2480 +       /* if not processing other domain samples */
2481 +       if (cpu_buf->last_task != task &&
2482 +           current_domain == COORDINATOR_DOMAIN) {
2483                 cpu_buf->last_task = task;
2484                 flags |= USER_CTX_SWITCH;
2485         }
2486 @@ -251,14 +256,14 @@ op_add_sample(struct oprofile_cpu_buffer
2487  /*
2488   * This must be safe from any context.
2489   *
2490 - * is_kernel is needed because on some architectures you cannot
2491 + * cpu_mode is needed because on some architectures you cannot
2492   * tell if you are in kernel or user space simply by looking at
2493 - * pc. We tag this in the buffer by generating kernel enter/exit
2494 - * events whenever is_kernel changes
2495 + * pc. We tag this in the buffer by generating kernel/user (and
2496 + * xen) enter events whenever cpu_mode changes
2497   */
2498  static int
2499  log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
2500 -          unsigned long backtrace, int is_kernel, unsigned long event)
2501 +          unsigned long backtrace, int cpu_mode, unsigned long event)
2502  {
2503         cpu_buf->sample_received++;
2504  
2505 @@ -267,7 +272,7 @@ log_sample(struct oprofile_cpu_buffer *c
2506                 return 0;
2507         }
2508  
2509 -       if (op_add_code(cpu_buf, backtrace, is_kernel, current))
2510 +       if (op_add_code(cpu_buf, backtrace, cpu_mode, current))
2511                 goto fail;
2512  
2513         if (op_add_sample(cpu_buf, pc, event))
2514 @@ -430,6 +435,25 @@ fail:
2515         return;
2516  }
2517  
2518 +int oprofile_add_domain_switch(int32_t domain_id)
2519 +{
2520 +       struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
2521 +
2522 +       /* should have space for switching into and out of domain
2523 +          (2 slots each) plus one sample and one cpu mode switch */
2524 +       if (((nr_available_slots(cpu_buf) < 6) &&
2525 +            (domain_id != COORDINATOR_DOMAIN)) ||
2526 +           (nr_available_slots(cpu_buf) < 2))
2527 +               return 0;
2528 +
2529 +       add_code(cpu_buf, DOMAIN_SWITCH);
2530 +       add_sample(cpu_buf, domain_id, 0);
2531 +
2532 +       current_domain = domain_id;
2533 +
2534 +       return 1;
2535 +}
2536 +
2537  /*
2538   * This serves to avoid cpu buffer overflow, and makes sure
2539   * the task mortuary progresses
2540 --- head-2011-02-08.orig/drivers/oprofile/cpu_buffer.h  2011-01-05 01:50:19.000000000 +0100
2541 +++ head-2011-02-08/drivers/oprofile/cpu_buffer.h       2011-01-31 14:53:38.000000000 +0100
2542 @@ -41,7 +41,7 @@ struct op_entry;
2543  struct oprofile_cpu_buffer {
2544         unsigned long buffer_size;
2545         struct task_struct *last_task;
2546 -       int last_is_kernel;
2547 +       int last_cpu_mode;
2548         int tracing;
2549         unsigned long sample_received;
2550         unsigned long sample_lost_overflow;
2551 @@ -63,7 +63,7 @@ static inline void op_cpu_buffer_reset(i
2552  {
2553         struct oprofile_cpu_buffer *cpu_buf = &per_cpu(op_cpu_buffer, cpu);
2554  
2555 -       cpu_buf->last_is_kernel = -1;
2556 +       cpu_buf->last_cpu_mode = -1;
2557         cpu_buf->last_task = NULL;
2558  }
2559  
2560 @@ -113,9 +113,13 @@ int op_cpu_buffer_get_data(struct op_ent
2561  }
2562  
2563  /* extra data flags */
2564 -#define KERNEL_CTX_SWITCH      (1UL << 0)
2565 -#define IS_KERNEL              (1UL << 1)
2566 +#define CPU_MODE_USER          0
2567 +#define CPU_MODE_KERNEL                1
2568 +#define CPU_MODE_XEN           2
2569 +#define CPU_MODE_MASK          3
2570  #define TRACE_BEGIN            (1UL << 2)
2571  #define USER_CTX_SWITCH                (1UL << 3)
2572 +#define KERNEL_CTX_SWITCH      (1UL << 4)
2573 +#define DOMAIN_SWITCH          (1UL << 5)
2574  
2575  #endif /* OPROFILE_CPU_BUFFER_H */
2576 --- head-2011-02-08.orig/drivers/oprofile/event_buffer.h        2008-12-25 00:26:37.000000000 +0100
2577 +++ head-2011-02-08/drivers/oprofile/event_buffer.h     2011-01-31 14:53:38.000000000 +0100
2578 @@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void);
2579  #define INVALID_COOKIE ~0UL
2580  #define NO_COOKIE 0UL
2581  
2582 +/* Constant used to refer to coordinator domain (Xen) */
2583 +#define COORDINATOR_DOMAIN -1
2584 +
2585  extern const struct file_operations event_buffer_fops;
2586  
2587  /* mutex between sync_cpu_buffers() and the
2588 --- head-2011-02-08.orig/drivers/oprofile/oprof.c       2011-01-05 01:50:19.000000000 +0100
2589 +++ head-2011-02-08/drivers/oprofile/oprof.c    2011-01-31 14:53:38.000000000 +0100
2590 @@ -5,6 +5,10 @@
2591   * @remark Read the file COPYING
2592   *
2593   * @author John Levon <levon@movementarian.org>
2594 + *
2595 + * Modified by Aravind Menon for Xen
2596 + * These modifications are:
2597 + * Copyright (C) 2005 Hewlett-Packard Co.
2598   */
2599  
2600  #include <linux/kernel.h>
2601 @@ -35,6 +39,32 @@ static DEFINE_MUTEX(start_mutex);
2602   */
2603  static int timer = 0;
2604  
2605 +int oprofile_set_active(int active_domains[], unsigned int adomains)
2606 +{
2607 +       int err;
2608 +
2609 +       if (!oprofile_ops.set_active)
2610 +               return -EINVAL;
2611 +
2612 +       mutex_lock(&start_mutex);
2613 +       err = oprofile_ops.set_active(active_domains, adomains);
2614 +       mutex_unlock(&start_mutex);
2615 +       return err;
2616 +}
2617 +
2618 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
2619 +{
2620 +       int err;
2621 +
2622 +       if (!oprofile_ops.set_passive)
2623 +               return -EINVAL;
2624 +
2625 +       mutex_lock(&start_mutex);
2626 +       err = oprofile_ops.set_passive(passive_domains, pdomains);
2627 +       mutex_unlock(&start_mutex);
2628 +       return err;
2629 +}
2630 +
2631  int oprofile_setup(void)
2632  {
2633         int err;
2634 --- head-2011-02-08.orig/drivers/oprofile/oprof.h       2011-01-05 01:50:19.000000000 +0100
2635 +++ head-2011-02-08/drivers/oprofile/oprof.h    2011-01-31 14:53:38.000000000 +0100
2636 @@ -40,4 +40,7 @@ void oprofile_timer_exit(void);
2637  int oprofile_set_ulong(unsigned long *addr, unsigned long val);
2638  int oprofile_set_timeout(unsigned long time);
2639  
2640 +int oprofile_set_active(int active_domains[], unsigned int adomains);
2641 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
2642 +
2643  #endif /* OPROF_H */
2644 --- head-2011-02-08.orig/drivers/oprofile/oprofile_files.c      2011-01-05 01:50:19.000000000 +0100
2645 +++ head-2011-02-08/drivers/oprofile/oprofile_files.c   2011-01-31 14:53:38.000000000 +0100
2646 @@ -5,11 +5,17 @@
2647   * @remark Read the file COPYING
2648   *
2649   * @author John Levon <levon@movementarian.org>
2650 + *
2651 + * Modified by Aravind Menon for Xen
2652 + * These modifications are:
2653 + * Copyright (C) 2005 Hewlett-Packard Co.
2654   */
2655  
2656  #include <linux/fs.h>
2657  #include <linux/oprofile.h>
2658  #include <linux/jiffies.h>
2659 +#include <asm/uaccess.h>
2660 +#include <linux/ctype.h>
2661  
2662  #include "event_buffer.h"
2663  #include "oprofile_stats.h"
2664 @@ -174,6 +180,195 @@ static const struct file_operations dump
2665         .llseek         = noop_llseek,
2666  };
2667  
2668 +#define TMPBUFSIZE 512
2669 +
2670 +static unsigned int adomains = 0;
2671 +static int active_domains[MAX_OPROF_DOMAINS + 1];
2672 +static DEFINE_MUTEX(adom_mutex);
2673 +
2674 +static ssize_t adomain_write(struct file * file, char const __user * buf,
2675 +                            size_t count, loff_t * offset)
2676 +{
2677 +       char *tmpbuf;
2678 +       char *startp, *endp;
2679 +       int i;
2680 +       unsigned long val;
2681 +       ssize_t retval = count;
2682 +
2683 +       if (*offset)
2684 +               return -EINVAL;
2685 +       if (count > TMPBUFSIZE - 1)
2686 +               return -EINVAL;
2687 +
2688 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2689 +               return -ENOMEM;
2690 +
2691 +       if (copy_from_user(tmpbuf, buf, count)) {
2692 +               kfree(tmpbuf);
2693 +               return -EFAULT;
2694 +       }
2695 +       tmpbuf[count] = 0;
2696 +
2697 +       mutex_lock(&adom_mutex);
2698 +
2699 +       startp = tmpbuf;
2700 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2701 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2702 +               val = simple_strtoul(startp, &endp, 0);
2703 +               if (endp == startp)
2704 +                       break;
2705 +               while (ispunct(*endp) || isspace(*endp))
2706 +                       endp++;
2707 +               active_domains[i] = val;
2708 +               if (active_domains[i] != val)
2709 +                       /* Overflow, force error below */
2710 +                       i = MAX_OPROF_DOMAINS + 1;
2711 +               startp = endp;
2712 +       }
2713 +       /* Force error on trailing junk */
2714 +       adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2715 +
2716 +       kfree(tmpbuf);
2717 +
2718 +       if (adomains > MAX_OPROF_DOMAINS
2719 +           || oprofile_set_active(active_domains, adomains)) {
2720 +               adomains = 0;
2721 +               retval = -EINVAL;
2722 +       }
2723 +
2724 +       mutex_unlock(&adom_mutex);
2725 +       return retval;
2726 +}
2727 +
2728 +static ssize_t adomain_read(struct file * file, char __user * buf,
2729 +                           size_t count, loff_t * offset)
2730 +{
2731 +       char * tmpbuf;
2732 +       size_t len;
2733 +       int i;
2734 +       ssize_t retval;
2735 +
2736 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2737 +               return -ENOMEM;
2738 +
2739 +       mutex_lock(&adom_mutex);
2740 +
2741 +       len = 0;
2742 +       for (i = 0; i < adomains; i++)
2743 +               len += snprintf(tmpbuf + len,
2744 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2745 +                               "%u ", active_domains[i]);
2746 +       WARN_ON(len > TMPBUFSIZE);
2747 +       if (len != 0 && len <= TMPBUFSIZE)
2748 +               tmpbuf[len-1] = '\n';
2749 +
2750 +       mutex_unlock(&adom_mutex);
2751 +
2752 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2753 +
2754 +       kfree(tmpbuf);
2755 +       return retval;
2756 +}
2757 +
2758 +
2759 +static const struct file_operations active_domain_ops = {
2760 +       .read           = adomain_read,
2761 +       .write          = adomain_write,
2762 +};
2763 +
2764 +static unsigned int pdomains = 0;
2765 +static int passive_domains[MAX_OPROF_DOMAINS];
2766 +static DEFINE_MUTEX(pdom_mutex);
2767 +
2768 +static ssize_t pdomain_write(struct file * file, char const __user * buf,
2769 +                            size_t count, loff_t * offset)
2770 +{
2771 +       char *tmpbuf;
2772 +       char *startp, *endp;
2773 +       int i;
2774 +       unsigned long val;
2775 +       ssize_t retval = count;
2776 +
2777 +       if (*offset)
2778 +               return -EINVAL;
2779 +       if (count > TMPBUFSIZE - 1)
2780 +               return -EINVAL;
2781 +
2782 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2783 +               return -ENOMEM;
2784 +
2785 +       if (copy_from_user(tmpbuf, buf, count)) {
2786 +               kfree(tmpbuf);
2787 +               return -EFAULT;
2788 +       }
2789 +       tmpbuf[count] = 0;
2790 +
2791 +       mutex_lock(&pdom_mutex);
2792 +
2793 +       startp = tmpbuf;
2794 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2795 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2796 +               val = simple_strtoul(startp, &endp, 0);
2797 +               if (endp == startp)
2798 +                       break;
2799 +               while (ispunct(*endp) || isspace(*endp))
2800 +                       endp++;
2801 +               passive_domains[i] = val;
2802 +               if (passive_domains[i] != val)
2803 +                       /* Overflow, force error below */
2804 +                       i = MAX_OPROF_DOMAINS + 1;
2805 +               startp = endp;
2806 +       }
2807 +       /* Force error on trailing junk */
2808 +       pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2809 +
2810 +       kfree(tmpbuf);
2811 +
2812 +       if (pdomains > MAX_OPROF_DOMAINS
2813 +           || oprofile_set_passive(passive_domains, pdomains)) {
2814 +               pdomains = 0;
2815 +               retval = -EINVAL;
2816 +       }
2817 +
2818 +       mutex_unlock(&pdom_mutex);
2819 +       return retval;
2820 +}
2821 +
2822 +static ssize_t pdomain_read(struct file * file, char __user * buf,
2823 +                           size_t count, loff_t * offset)
2824 +{
2825 +       char * tmpbuf;
2826 +       size_t len;
2827 +       int i;
2828 +       ssize_t retval;
2829 +
2830 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2831 +               return -ENOMEM;
2832 +
2833 +       mutex_lock(&pdom_mutex);
2834 +
2835 +       len = 0;
2836 +       for (i = 0; i < pdomains; i++)
2837 +               len += snprintf(tmpbuf + len,
2838 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2839 +                               "%u ", passive_domains[i]);
2840 +       WARN_ON(len > TMPBUFSIZE);
2841 +       if (len != 0 && len <= TMPBUFSIZE)
2842 +               tmpbuf[len-1] = '\n';
2843 +
2844 +       mutex_unlock(&pdom_mutex);
2845 +
2846 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2847 +
2848 +       kfree(tmpbuf);
2849 +       return retval;
2850 +}
2851 +
2852 +static const struct file_operations passive_domain_ops = {
2853 +       .read           = pdomain_read,
2854 +       .write          = pdomain_write,
2855 +};
2856 +
2857  void oprofile_create_files(struct super_block *sb, struct dentry *root)
2858  {
2859         /* reinitialize default values */
2860 @@ -184,6 +379,8 @@ void oprofile_create_files(struct super_
2861  
2862         oprofilefs_create_file(sb, root, "enable", &enable_fops);
2863         oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2864 +       oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2865 +       oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2866         oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2867         oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size);
2868         oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed);
2869 --- head-2011-02-08.orig/fs/aio.c       2011-01-31 12:42:35.000000000 +0100
2870 +++ head-2011-02-08/fs/aio.c    2011-01-31 14:53:38.000000000 +0100
2871 @@ -41,6 +41,11 @@
2872  #include <asm/kmap_types.h>
2873  #include <asm/uaccess.h>
2874  
2875 +#ifdef CONFIG_EPOLL
2876 +#include <linux/poll.h>
2877 +#include <linux/eventpoll.h>
2878 +#endif
2879 +
2880  #if DEBUG > 1
2881  #define dprintk                printk
2882  #else
2883 @@ -989,6 +994,11 @@ put_rq:
2884         if (waitqueue_active(&ctx->wait))
2885                 wake_up(&ctx->wait);
2886  
2887 +#ifdef CONFIG_EPOLL
2888 +       if (ctx->file && waitqueue_active(&ctx->poll_wait))
2889 +               wake_up(&ctx->poll_wait);
2890 +#endif
2891 +
2892         spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2893         return ret;
2894  }
2895 @@ -997,6 +1007,8 @@ EXPORT_SYMBOL(aio_complete);
2896  /* aio_read_evt
2897   *     Pull an event off of the ioctx's event ring.  Returns the number of 
2898   *     events fetched (0 or 1 ;-)
2899 + *     If ent parameter is 0, just returns the number of events that would
2900 + *     be fetched.
2901   *     FIXME: make this use cmpxchg.
2902   *     TODO: make the ringbuffer user mmap()able (requires FIXME).
2903   */
2904 @@ -1019,13 +1031,18 @@ static int aio_read_evt(struct kioctx *i
2905  
2906         head = ring->head % info->nr;
2907         if (head != ring->tail) {
2908 -               struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2909 -               *ent = *evp;
2910 -               head = (head + 1) % info->nr;
2911 -               smp_mb(); /* finish reading the event before updatng the head */
2912 -               ring->head = head;
2913 -               ret = 1;
2914 -               put_aio_ring_event(evp, KM_USER1);
2915 +               if (ent) { /* event requested */
2916 +                       struct io_event *evp =
2917 +                               aio_ring_event(info, head, KM_USER1);
2918 +                       *ent = *evp;
2919 +                       head = (head + 1) % info->nr;
2920 +                       /* finish reading the event before updatng the head */
2921 +                       smp_mb();
2922 +                       ring->head = head;
2923 +                       ret = 1;
2924 +                       put_aio_ring_event(evp, KM_USER1);
2925 +               } else /* only need to know availability */
2926 +                       ret = 1;
2927         }
2928         spin_unlock(&info->ring_lock);
2929  
2930 @@ -1210,6 +1227,13 @@ static void io_destroy(struct kioctx *io
2931  
2932         aio_cancel_all(ioctx);
2933         wait_for_all_aios(ioctx);
2934 +#ifdef CONFIG_EPOLL
2935 +       /* forget the poll file, but it's up to the user to close it */
2936 +       if (ioctx->file) {
2937 +               ioctx->file->private_data = 0;
2938 +               ioctx->file = 0;
2939 +       }
2940 +#endif
2941  
2942         /*
2943          * Wake up any waiters.  The setting of ctx->dead must be seen
2944 @@ -1220,6 +1244,67 @@ static void io_destroy(struct kioctx *io
2945         put_ioctx(ioctx);       /* once for the lookup */
2946  }
2947  
2948 +#ifdef CONFIG_EPOLL
2949 +
2950 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
2951 +{
2952 +       struct kioctx *ioctx = file->private_data;
2953 +       if (ioctx) {
2954 +               file->private_data = 0;
2955 +               spin_lock_irq(&ioctx->ctx_lock);
2956 +               ioctx->file = 0;
2957 +               spin_unlock_irq(&ioctx->ctx_lock);
2958 +       }
2959 +       return 0;
2960 +}
2961 +
2962 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
2963 +{      unsigned int pollflags = 0;
2964 +       struct kioctx *ioctx = file->private_data;
2965 +
2966 +       if (ioctx) {
2967 +
2968 +               spin_lock_irq(&ioctx->ctx_lock);
2969 +               /* Insert inside our poll wait queue */
2970 +               poll_wait(file, &ioctx->poll_wait, wait);
2971 +
2972 +               /* Check our condition */
2973 +               if (aio_read_evt(ioctx, 0))
2974 +                       pollflags = POLLIN | POLLRDNORM;
2975 +               spin_unlock_irq(&ioctx->ctx_lock);
2976 +       }
2977 +
2978 +       return pollflags;
2979 +}
2980 +
2981 +static const struct file_operations aioq_fops = {
2982 +       .release        = aio_queue_fd_close,
2983 +       .poll           = aio_queue_fd_poll
2984 +};
2985 +
2986 +/* make_aio_fd:
2987 + *  Create a file descriptor that can be used to poll the event queue.
2988 + *  Based and piggybacked on the excellent epoll code.
2989 + */
2990 +
2991 +static int make_aio_fd(struct kioctx *ioctx)
2992 +{
2993 +       int error, fd;
2994 +       struct inode *inode;
2995 +       struct file *file;
2996 +
2997 +       error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
2998 +       if (error)
2999 +               return error;
3000 +
3001 +       /* associate the file with the IO context */
3002 +       file->private_data = ioctx;
3003 +       ioctx->file = file;
3004 +       init_waitqueue_head(&ioctx->poll_wait);
3005 +       return fd;
3006 +}
3007 +#endif
3008 +
3009  /* sys_io_setup:
3010   *     Create an aio_context capable of receiving at least nr_events.
3011   *     ctxp must not point to an aio_context that already exists, and
3012 @@ -1232,18 +1317,30 @@ static void io_destroy(struct kioctx *io
3013   *     resources are available.  May fail with -EFAULT if an invalid
3014   *     pointer is passed for ctxp.  Will fail with -ENOSYS if not
3015   *     implemented.
3016 + *
3017 + *     To request a selectable fd, the user context has to be initialized
3018 + *     to 1, instead of 0, and the return value is the fd.
3019 + *     This keeps the system call compatible, since a non-zero value
3020 + *     was not allowed so far.
3021   */
3022  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
3023  {
3024         struct kioctx *ioctx = NULL;
3025         unsigned long ctx;
3026         long ret;
3027 +       int make_fd = 0;
3028  
3029         ret = get_user(ctx, ctxp);
3030         if (unlikely(ret))
3031                 goto out;
3032  
3033         ret = -EINVAL;
3034 +#ifdef CONFIG_EPOLL
3035 +       if (ctx == 1) {
3036 +               make_fd = 1;
3037 +               ctx = 0;
3038 +       }
3039 +#endif
3040         if (unlikely(ctx || nr_events == 0)) {
3041                 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
3042                          ctx, nr_events);
3043 @@ -1254,8 +1351,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_e
3044         ret = PTR_ERR(ioctx);
3045         if (!IS_ERR(ioctx)) {
3046                 ret = put_user(ioctx->user_id, ctxp);
3047 -               if (!ret)
3048 -                       return 0;
3049 +#ifdef CONFIG_EPOLL
3050 +               if (make_fd && ret >= 0)
3051 +                       ret = make_aio_fd(ioctx);
3052 +#endif
3053 +               if (ret >= 0)
3054 +                       return ret;
3055  
3056                 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */