- Update Xen patches to 2.6.36-rc3 and c/s 1029.
[opensuse:kernel-source.git] / patches.xen / xen3-auto-common.diff
1 Subject: xen3 common
2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1029:9b1adfb8b0b3)
3 Patch-mainline: n/a
4 Acked-by: jbeulich@novell.com
5
6 List of files that don't require modification anymore (and hence got
7 removed from this patch), for reference and in case upstream wants to
8 take the forward porting patches:
9 2.6.22/include/linux/sched.h
10 2.6.22/kernel/softlockup.c
11 2.6.22/kernel/timer.c
12 2.6.25/mm/highmem.c
13 2.6.30/include/linux/pci_regs.h
14
15 --- head-2010-08-30.orig/drivers/Makefile       2010-08-24 11:01:24.000000000 +0200
16 +++ head-2010-08-30/drivers/Makefile    2010-06-22 10:37:35.000000000 +0200
17 @@ -35,6 +35,7 @@ obj-$(CONFIG_PARPORT)         += parport/
18  obj-y                          += base/ block/ misc/ mfd/
19  obj-$(CONFIG_NUBUS)            += nubus/
20  obj-y                          += macintosh/
21 +obj-$(CONFIG_XEN)              += xen/
22  obj-$(CONFIG_IDE)              += ide/
23  obj-$(CONFIG_SCSI)             += scsi/
24  obj-$(CONFIG_ATA)              += ata/
25 --- head-2010-08-30.orig/drivers/acpi/Makefile  2010-08-24 11:00:24.000000000 +0200
26 +++ head-2010-08-30/drivers/acpi/Makefile       2010-08-24 11:33:11.000000000 +0200
27 @@ -67,6 +67,9 @@ obj-$(CONFIG_ACPI_EC_DEBUGFS) += ec_sys.
28  processor-y                    := processor_driver.o processor_throttling.o
29  processor-y                    += processor_idle.o processor_thermal.o
30  processor-$(CONFIG_CPU_FREQ)   += processor_perflib.o
31 +ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
32 +processor-objs += processor_perflib.o processor_extcntl.o
33 +endif
34  
35  obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
36  
37 --- head-2010-08-30.orig/drivers/acpi/acpica/hwsleep.c  2010-08-24 11:00:25.000000000 +0200
38 +++ head-2010-08-30/drivers/acpi/acpica/hwsleep.c       2010-06-22 10:37:35.000000000 +0200
39 @@ -236,7 +236,11 @@ acpi_status asmlinkage acpi_enter_sleep_
40         u32 pm1b_control;
41         struct acpi_bit_register_info *sleep_type_reg_info;
42         struct acpi_bit_register_info *sleep_enable_reg_info;
43 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
44         u32 in_value;
45 +#else
46 +       int err;
47 +#endif
48         struct acpi_object_list arg_list;
49         union acpi_object arg;
50         acpi_status status;
51 @@ -347,6 +351,7 @@ acpi_status asmlinkage acpi_enter_sleep_
52  
53         /* Write #2: Write both SLP_TYP + SLP_EN */
54  
55 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
56         status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
57         if (ACPI_FAILURE(status)) {
58                 return_ACPI_STATUS(status);
59 @@ -386,6 +391,16 @@ acpi_status asmlinkage acpi_enter_sleep_
60                 /* Spin until we wake */
61  
62         } while (!in_value);
63 +#else
64 +       /* PV ACPI just need check hypercall return value */
65 +       err = acpi_notify_hypervisor_state(sleep_state,
66 +                       PM1Acontrol, PM1Bcontrol);
67 +       if (err) {
68 +               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
69 +                                 "Hypervisor failure [%d]\n", err));
70 +               return_ACPI_STATUS(AE_ERROR);
71 +       }
72 +#endif
73  
74         return_ACPI_STATUS(AE_OK);
75  }
76 --- head-2010-08-30.orig/drivers/acpi/processor_driver.c        2010-08-24 11:00:25.000000000 +0200
77 +++ head-2010-08-30/drivers/acpi/processor_driver.c     2010-08-24 11:33:22.000000000 +0200
78 @@ -364,7 +364,8 @@ static int acpi_processor_get_info(struc
79          */
80         if (pr->id == -1) {
81                 if (ACPI_FAILURE
82 -                   (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
83 +                   (acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
84 +                   !processor_cntl_external()) {
85                         return -ENODEV;
86                 }
87         }
88 @@ -415,7 +416,11 @@ static int acpi_processor_get_info(struc
89         return 0;
90  }
91  
92 +#ifndef CONFIG_XEN
93  static DEFINE_PER_CPU(void *, processor_device_array);
94 +#else
95 +static void *processor_device_array[NR_ACPI_CPUS];
96 +#endif
97  
98  static void acpi_processor_notify(struct acpi_device *device, u32 event)
99  {
100 @@ -496,8 +501,11 @@ static int __cpuinit acpi_processor_add(
101         strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
102         device->driver_data = pr;
103  
104 +       processor_extcntl_init();
105 +
106         result = acpi_processor_get_info(device);
107 -       if (result) {
108 +       if (result ||
109 +           ((pr->id == -1) && !processor_cntl_external())) {
110                 /* Processor is physically not present */
111                 return 0;
112         }
113 @@ -507,23 +515,36 @@ static int __cpuinit acpi_processor_add(
114                 return 0;
115  #endif
116  
117 -       BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
118 +       BUG_ON(!processor_cntl_external() &&
119 +              ((pr->id >= nr_cpu_ids) || (pr->id < 0)));
120  
121         /*
122          * Buggy BIOS check
123          * ACPI id of processors can be reported wrongly by the BIOS.
124          * Don't trust it blindly
125          */
126 +#ifndef CONFIG_XEN
127         if (per_cpu(processor_device_array, pr->id) != NULL &&
128             per_cpu(processor_device_array, pr->id) != device) {
129 +#else
130 +       BUG_ON(pr->acpi_id >= NR_ACPI_CPUS);
131 +       if (processor_device_array[pr->acpi_id] != NULL &&
132 +           processor_device_array[pr->acpi_id] != device) {
133 +#endif
134                 printk(KERN_WARNING "BIOS reported wrong ACPI id "
135                         "for the processor\n");
136                 result = -ENODEV;
137                 goto err_free_cpumask;
138         }
139 +#ifndef CONFIG_XEN
140         per_cpu(processor_device_array, pr->id) = device;
141  
142         per_cpu(processors, pr->id) = pr;
143 +#else
144 +       processor_device_array[pr->acpi_id] = device;
145 +       if (pr->id != -1)
146 +               per_cpu(processors, pr->id) = pr;
147 +#endif
148  
149         result = acpi_processor_add_fs(device);
150         if (result)
151 @@ -535,16 +556,28 @@ static int __cpuinit acpi_processor_add(
152                 goto err_remove_fs;
153         }
154  
155 -#ifdef CONFIG_CPU_FREQ
156 +#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
157         acpi_processor_ppc_has_changed(pr, 0);
158  #endif
159 -       acpi_processor_get_throttling_info(pr);
160 -       acpi_processor_get_limit_info(pr);
161  
162 +       /*
163 +        * pr->id may equal to -1 while processor_cntl_external enabled.
164 +        * throttle and thermal module don't support this case.
165 +        * Tx only works when dom0 vcpu == pcpu num by far, as we give
166 +        * control to dom0.
167 +        */
168 +       if (pr->id != -1) {
169 +               acpi_processor_get_throttling_info(pr);
170 +               acpi_processor_get_limit_info(pr);
171 +       }
172  
173         if (cpuidle_get_driver() == &acpi_idle_driver)
174                 acpi_processor_power_init(pr, device);
175  
176 +       result = processor_extcntl_prepare(pr);
177 +       if (result)
178 +               goto end;
179 +
180         pr->cdev = thermal_cooling_device_register("Processor", device,
181                                                 &processor_cooling_ops);
182         if (IS_ERR(pr->cdev)) {
183 @@ -596,7 +629,7 @@ static int acpi_processor_remove(struct 
184  
185         pr = acpi_driver_data(device);
186  
187 -       if (pr->id >= nr_cpu_ids)
188 +       if (!processor_cntl_external() && pr->id >= nr_cpu_ids)
189                 goto free;
190  
191         if (type == ACPI_BUS_REMOVAL_EJECT) {
192 @@ -617,8 +650,14 @@ static int acpi_processor_remove(struct 
193                 pr->cdev = NULL;
194         }
195  
196 +#ifndef CONFIG_XEN
197         per_cpu(processors, pr->id) = NULL;
198         per_cpu(processor_device_array, pr->id) = NULL;
199 +#else
200 +       if (pr->id != -1)
201 +               per_cpu(processors, pr->id) = NULL;
202 +       processor_device_array[pr->acpi_id] = NULL;
203 +#endif
204  
205  free:
206         free_cpumask_var(pr->throttling.shared_cpu_map);
207 @@ -674,6 +713,10 @@ int acpi_processor_device_add(acpi_handl
208                 return -ENODEV;
209         }
210  
211 +       if (processor_cntl_external() && acpi_driver_data(*device))
212 +               processor_notify_external(acpi_driver_data(*device),
213 +                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
214 +
215         return 0;
216  }
217  
218 @@ -703,6 +746,10 @@ static void __ref acpi_processor_hotplug
219                                             "Unable to add the device\n");
220                         break;
221                 }
222 +               pr = acpi_driver_data(device);
223 +               if (processor_cntl_external() && pr)
224 +                       processor_notify_external(pr,
225 +                                       PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
226                 break;
227         case ACPI_NOTIFY_EJECT_REQUEST:
228                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
229 @@ -719,6 +766,9 @@ static void __ref acpi_processor_hotplug
230                                     "Driver data is NULL, dropping EJECT\n");
231                         return;
232                 }
233 +               if (processor_cntl_external())
234 +                       processor_notify_external(pr, PROCESSOR_HOTPLUG,
235 +                                               HOTPLUG_TYPE_REMOVE);
236                 break;
237         default:
238                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
239 @@ -783,6 +833,11 @@ static acpi_status acpi_processor_hotadd
240  
241  static int acpi_processor_handle_eject(struct acpi_processor *pr)
242  {
243 +#ifdef CONFIG_XEN
244 +       if (pr->id == -1)
245 +               return (0);
246 +#endif
247 +
248         if (cpu_online(pr->id))
249                 cpu_down(pr->id);
250  
251 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
252 +++ head-2010-08-30/drivers/acpi/processor_extcntl.c    2010-06-22 10:37:35.000000000 +0200
253 @@ -0,0 +1,241 @@
254 +/*
255 + * processor_extcntl.c - channel to external control logic
256 + *
257 + *  Copyright (C) 2008, Intel corporation
258 + *
259 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
260 + *
261 + *  This program is free software; you can redistribute it and/or modify
262 + *  it under the terms of the GNU General Public License as published by
263 + *  the Free Software Foundation; either version 2 of the License, or (at
264 + *  your option) any later version.
265 + *
266 + *  This program is distributed in the hope that it will be useful, but
267 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
268 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
269 + *  General Public License for more details.
270 + *
271 + *  You should have received a copy of the GNU General Public License along
272 + *  with this program; if not, write to the Free Software Foundation, Inc.,
273 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
274 + *
275 + */
276 +
277 +#include <linux/kernel.h>
278 +#include <linux/init.h>
279 +#include <linux/types.h>
280 +#include <linux/acpi.h>
281 +#include <linux/pm.h>
282 +#include <linux/cpu.h>
283 +
284 +#include <acpi/processor.h>
285 +
286 +#define ACPI_PROCESSOR_COMPONENT        0x01000000
287 +#define ACPI_PROCESSOR_CLASS            "processor"
288 +#define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
289 +#define _COMPONENT              ACPI_PROCESSOR_COMPONENT
290 +ACPI_MODULE_NAME("acpi_processor")
291 +
292 +static int processor_extcntl_parse_csd(struct acpi_processor *pr);
293 +static int processor_extcntl_get_performance(struct acpi_processor *pr);
294 +/*
295 + * External processor control logic may register with its own set of
296 + * ops to get ACPI related notification. One example is like VMM.
297 + */
298 +const struct processor_extcntl_ops *processor_extcntl_ops;
299 +EXPORT_SYMBOL(processor_extcntl_ops);
300 +
301 +static int processor_notify_smm(void)
302 +{
303 +       acpi_status status;
304 +       static int is_done = 0;
305 +
306 +       /* only need successfully notify BIOS once */
307 +       /* avoid double notification which may lead to unexpected result */
308 +       if (is_done)
309 +               return 0;
310 +
311 +       /* Can't write pstate_cnt to smi_cmd if either value is zero */
312 +       if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
313 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
314 +               return 0;
315 +       }
316 +
317 +       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
318 +               "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
319 +               acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
320 +
321 +       /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
322 +        * it anyway, so we need to support it... */
323 +       if (acpi_fadt_is_v1) {
324 +               ACPI_DEBUG_PRINT((ACPI_DB_INFO,
325 +                       "Using v1.0 FADT reserved value for pstate_cnt\n"));
326 +       }
327 +
328 +       status = acpi_os_write_port(acpi_fadt.smi_cmd,
329 +                                   (u32) acpi_fadt.pstate_cnt, 8);
330 +       if (ACPI_FAILURE(status))
331 +               return status;
332 +
333 +       is_done = 1;
334 +
335 +       return 0;
336 +}
337 +
338 +int processor_notify_external(struct acpi_processor *pr, int event, int type)
339 +{
340 +       int ret = -EINVAL;
341 +
342 +       if (!processor_cntl_external())
343 +               return -EINVAL;
344 +
345 +       switch (event) {
346 +       case PROCESSOR_PM_INIT:
347 +       case PROCESSOR_PM_CHANGE:
348 +               if ((type >= PM_TYPE_MAX) ||
349 +                       !processor_extcntl_ops->pm_ops[type])
350 +                       break;
351 +
352 +               ret = processor_extcntl_ops->pm_ops[type](pr, event);
353 +               break;
354 +       case PROCESSOR_HOTPLUG:
355 +               if (processor_extcntl_ops->hotplug)
356 +                       ret = processor_extcntl_ops->hotplug(pr, type);
357 +               break;
358 +       default:
359 +               printk(KERN_ERR "Unsupport processor events %d.\n", event);
360 +               break;
361 +       }
362 +
363 +       return ret;
364 +}
365 +
366 +/*
367 + * External control logic can decide to grab full or part of physical
368 + * processor control bits. Take a VMM for example, physical processors
369 + * are owned by VMM and thus existence information like hotplug is
370 + * always required to be notified to VMM. Similar is processor idle
371 + * state which is also necessarily controlled by VMM. But for other
372 + * control bits like performance/throttle states, VMM may choose to
373 + * control or not upon its own policy.
374 + */
375 +void processor_extcntl_init(void)
376 +{
377 +       if (!processor_extcntl_ops)
378 +               arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
379 +}
380 +
381 +/*
382 + * This is called from ACPI processor init, and targeted to hold
383 + * some tricky housekeeping jobs to satisfy external control model.
384 + * For example, we may put dependency parse stub here for idle
385 + * and performance state. Those information may be not available
386 + * if splitting from dom0 control logic like cpufreq driver.
387 + */
388 +int processor_extcntl_prepare(struct acpi_processor *pr)
389 +{
390 +       /* parse cstate dependency information */
391 +       if (processor_pm_external())
392 +               processor_extcntl_parse_csd(pr);
393 +
394 +       /* Initialize performance states */
395 +       if (processor_pmperf_external())
396 +               processor_extcntl_get_performance(pr);
397 +
398 +       return 0;
399 +}
400 +
401 +/*
402 + * Currently no _CSD is implemented which is why existing ACPI code
403 + * doesn't parse _CSD at all. But to keep interface complete with
404 + * external control logic, we put a placeholder here for future
405 + * compatibility.
406 + */
407 +static int processor_extcntl_parse_csd(struct acpi_processor *pr)
408 +{
409 +       int i;
410 +
411 +       for (i = 0; i < pr->power.count; i++) {
412 +               if (!pr->power.states[i].valid)
413 +                       continue;
414 +
415 +               /* No dependency by default */
416 +               pr->power.states[i].domain_info = NULL;
417 +               pr->power.states[i].csd_count = 0;
418 +       }
419 +
420 +       return 0;
421 +}
422 +
423 +/*
424 + * Existing ACPI module does parse performance states at some point,
425 + * when acpi-cpufreq driver is loaded which however is something
426 + * we'd like to disable to avoid confliction with external control
427 + * logic. So we have to collect raw performance information here
428 + * when ACPI processor object is found and started.
429 + */
430 +static int processor_extcntl_get_performance(struct acpi_processor *pr)
431 +{
432 +       int ret;
433 +       struct acpi_processor_performance *perf;
434 +       struct acpi_psd_package *pdomain;
435 +
436 +       if (pr->performance)
437 +               return -EBUSY;
438 +
439 +       perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
440 +       if (!perf)
441 +               return -ENOMEM;
442 +
443 +       pr->performance = perf;
444 +       /* Get basic performance state information */
445 +       ret = acpi_processor_get_performance_info(pr);
446 +       if (ret < 0)
447 +               goto err_out;
448 +
449 +       /*
450 +        * Well, here we need retrieve performance dependency information
451 +        * from _PSD object. The reason why existing interface is not used
452 +        * is due to the reason that existing interface sticks to Linux cpu
453 +        * id to construct some bitmap, however we want to split ACPI
454 +        * processor objects from Linux cpu id logic. For example, even
455 +        * when Linux is configured as UP, we still want to parse all ACPI
456 +        * processor objects to external logic. In this case, it's preferred
457 +        * to use ACPI ID instead.
458 +        */
459 +       pdomain = &pr->performance->domain_info;
460 +       pdomain->num_processors = 0;
461 +       ret = acpi_processor_get_psd(pr);
462 +       if (ret < 0) {
463 +               /*
464 +                * _PSD is optional - assume no coordination if absent (or
465 +                * broken), matching native kernels' behavior.
466 +                */
467 +               pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
468 +               pdomain->revision = ACPI_PSD_REV0_REVISION;
469 +               pdomain->domain = pr->acpi_id;
470 +               pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
471 +               pdomain->num_processors = 1;
472 +       }
473 +
474 +       /* Some sanity check */
475 +       if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
476 +           (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
477 +           ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
478 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
479 +            (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
480 +               ret = -EINVAL;
481 +               goto err_out;
482 +       }
483 +
484 +       /* Last step is to notify BIOS that external logic exists */
485 +       processor_notify_smm();
486 +
487 +       processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
488 +
489 +       return 0;
490 +err_out:
491 +       pr->performance = NULL;
492 +       kfree(perf);
493 +       return ret;
494 +}
495 --- head-2010-08-30.orig/drivers/acpi/processor_idle.c  2010-08-24 11:00:25.000000000 +0200
496 +++ head-2010-08-30/drivers/acpi/processor_idle.c       2010-08-24 11:35:17.000000000 +0200
497 @@ -452,7 +452,8 @@ static int acpi_processor_get_power_info
498                                  */
499                                 cx.entry_method = ACPI_CSTATE_HALT;
500                                 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
501 -                       } else {
502 +                       /* This doesn't apply to external control case */
503 +                       } else if (!processor_pm_external()) {
504                                 continue;
505                         }
506                         if (cx.type == ACPI_STATE_C1 &&
507 @@ -491,6 +492,12 @@ static int acpi_processor_get_power_info
508  
509                 cx.power = obj->integer.value;
510  
511 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
512 +               /* cache control methods to notify external logic */
513 +               if (processor_pm_external())
514 +                       memcpy(&cx.reg, reg, sizeof(*reg));
515 +#endif
516 +
517                 current_count++;
518                 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
519  
520 @@ -1138,6 +1145,11 @@ int __cpuinit acpi_processor_power_init(
521                 if (cpuidle_register_device(&pr->power.dev))
522                         return -EIO;
523         }
524 +
525 +       if (processor_pm_external())
526 +               processor_notify_external(pr,
527 +                       PROCESSOR_PM_INIT, PM_TYPE_IDLE);
528 +
529         return 0;
530  }
531  
532 --- head-2010-08-30.orig/drivers/acpi/processor_perflib.c       2010-05-16 23:17:36.000000000 +0200
533 +++ head-2010-08-30/drivers/acpi/processor_perflib.c    2010-06-22 10:37:35.000000000 +0200
534 @@ -79,6 +79,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre
535  
536  static int acpi_processor_ppc_status;
537  
538 +#ifdef CONFIG_CPU_FREQ
539  static int acpi_processor_ppc_notifier(struct notifier_block *nb,
540                                        unsigned long event, void *data)
541  {
542 @@ -121,6 +122,7 @@ static int acpi_processor_ppc_notifier(s
543  static struct notifier_block acpi_ppc_notifier_block = {
544         .notifier_call = acpi_processor_ppc_notifier,
545  };
546 +#endif /* CONFIG_CPU_FREQ */
547  
548  static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
549  {
550 @@ -209,7 +211,12 @@ int acpi_processor_ppc_has_changed(struc
551         if (ret < 0)
552                 return (ret);
553         else
554 +#ifdef CONFIG_CPU_FREQ
555                 return cpufreq_update_policy(pr->id);
556 +#elif defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
557 +               return processor_notify_external(pr,
558 +                               PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
559 +#endif
560  }
561  
562  int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
563 @@ -225,6 +232,7 @@ int acpi_processor_get_bios_limit(int cp
564  }
565  EXPORT_SYMBOL(acpi_processor_get_bios_limit);
566  
567 +#ifdef CONFIG_CPU_FREQ
568  void acpi_processor_ppc_init(void)
569  {
570         if (!cpufreq_register_notifier
571 @@ -243,6 +251,7 @@ void acpi_processor_ppc_exit(void)
572  
573         acpi_processor_ppc_status &= ~PPC_REGISTERED;
574  }
575 +#endif /* CONFIG_CPU_FREQ */
576  
577  static int acpi_processor_get_performance_control(struct acpi_processor *pr)
578  {
579 @@ -390,7 +399,10 @@ static int acpi_processor_get_performanc
580         return result;
581  }
582  
583 -static int acpi_processor_get_performance_info(struct acpi_processor *pr)
584 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
585 +static
586 +#endif
587 +int acpi_processor_get_performance_info(struct acpi_processor *pr)
588  {
589         int result = 0;
590         acpi_status status = AE_OK;
591 @@ -435,6 +447,7 @@ static int acpi_processor_get_performanc
592         return result;
593  }
594  
595 +#ifdef CONFIG_CPU_FREQ
596  int acpi_processor_notify_smm(struct module *calling_module)
597  {
598         acpi_status status;
599 @@ -495,8 +508,12 @@ int acpi_processor_notify_smm(struct mod
600  }
601  
602  EXPORT_SYMBOL(acpi_processor_notify_smm);
603 +#endif /* CONFIG_CPU_FREQ */
604  
605 -static int acpi_processor_get_psd(struct acpi_processor        *pr)
606 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
607 +static
608 +#endif
609 +int acpi_processor_get_psd(struct acpi_processor *pr)
610  {
611         int result = 0;
612         acpi_status status = AE_OK;
613 --- head-2010-08-30.orig/drivers/acpi/sleep.c   2010-08-24 11:00:25.000000000 +0200
614 +++ head-2010-08-30/drivers/acpi/sleep.c        2010-08-24 11:37:35.000000000 +0200
615 @@ -60,6 +60,7 @@ static struct notifier_block tts_notifie
616  static int acpi_sleep_prepare(u32 acpi_state)
617  {
618  #ifdef CONFIG_ACPI_SLEEP
619 +#ifndef CONFIG_ACPI_PV_SLEEP
620         /* do we have a wakeup address for S2 and S3? */
621         if (acpi_state == ACPI_STATE_S3) {
622                 if (!acpi_wakeup_address) {
623 @@ -69,6 +70,7 @@ static int acpi_sleep_prepare(u32 acpi_s
624                                 (acpi_physical_address)acpi_wakeup_address);
625  
626         }
627 +#endif
628         ACPI_FLUSH_CPU_CACHE();
629  #endif
630         printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n",
631 @@ -265,7 +267,14 @@ static int acpi_suspend_enter(suspend_st
632                 break;
633  
634         case ACPI_STATE_S3:
635 +#ifdef CONFIG_ACPI_PV_SLEEP
636 +               /* Hyperviosr will save and restore CPU context
637 +                * and then we can skip low level housekeeping here.
638 +                */
639 +               acpi_enter_sleep_state(acpi_state);
640 +#else
641                 do_suspend_lowlevel();
642 +#endif
643                 break;
644         }
645  
646 --- head-2010-08-30.orig/drivers/char/agp/intel-gtt.c   2010-08-24 11:01:20.000000000 +0200
647 +++ head-2010-08-30/drivers/char/agp/intel-gtt.c        2010-08-24 11:37:52.000000000 +0200
648 @@ -285,6 +285,13 @@ static struct page *i8xx_alloc_pages(voi
649         if (page == NULL)
650                 return NULL;
651  
652 +#ifdef CONFIG_XEN
653 +       if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
654 +               __free_pages(page, 2);
655 +               return NULL;
656 +       }
657 +#endif
658 +
659         if (set_pages_uc(page, 4) < 0) {
660                 set_pages_wb(page, 4);
661                 __free_pages(page, 2);
662 @@ -301,6 +308,9 @@ static void i8xx_destroy_pages(struct pa
663                 return;
664  
665         set_pages_wb(page, 4);
666 +#ifdef CONFIG_XEN
667 +       xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
668 +#endif
669         put_page(page);
670         __free_pages(page, 2);
671         atomic_dec(&agp_bridge->current_memory_agp);
672 --- head-2010-08-30.orig/drivers/char/mem.c     2010-08-24 11:01:19.000000000 +0200
673 +++ head-2010-08-30/drivers/char/mem.c  2010-06-22 10:37:35.000000000 +0200
674 @@ -89,6 +89,7 @@ void __weak unxlate_dev_mem_ptr(unsigned
675  {
676  }
677  
678 +#ifndef ARCH_HAS_DEV_MEM
679  /*
680   * This funcion reads the *physical* memory. The f_pos points directly to the
681   * memory location.
682 @@ -211,6 +212,7 @@ static ssize_t write_mem(struct file *fi
683         *ppos += written;
684         return written;
685  }
686 +#endif
687  
688  int __weak phys_mem_access_prot_allowed(struct file *file,
689         unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
690 @@ -337,6 +339,9 @@ static int mmap_mem(struct file *file, s
691  static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
692  {
693         unsigned long pfn;
694 +#ifdef CONFIG_XEN
695 +       unsigned long i, count;
696 +#endif
697  
698         /* Turn a kernel-virtual address into a physical page frame */
699         pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
700 @@ -351,6 +356,13 @@ static int mmap_kmem(struct file *file, 
701         if (!pfn_valid(pfn))
702                 return -EIO;
703  
704 +#ifdef CONFIG_XEN
705 +       count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
706 +       for (i = 0; i < count; i++)
707 +               if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i)))
708 +                       return -EIO;
709 +#endif
710 +
711         vma->vm_pgoff = pfn;
712         return mmap_mem(file, vma);
713  }
714 @@ -845,6 +857,7 @@ static int open_port(struct inode * inod
715  #define open_kmem      open_mem
716  #define open_oldmem    open_mem
717  
718 +#ifndef ARCH_HAS_DEV_MEM
719  static const struct file_operations mem_fops = {
720         .llseek         = memory_lseek,
721         .read           = read_mem,
722 @@ -853,6 +866,9 @@ static const struct file_operations mem_
723         .open           = open_mem,
724         .get_unmapped_area = get_unmapped_area_mem,
725  };
726 +#else
727 +extern const struct file_operations mem_fops;
728 +#endif
729  
730  #ifdef CONFIG_DEVKMEM
731  static const struct file_operations kmem_fops = {
732 --- head-2010-08-30.orig/drivers/char/tpm/Makefile      2006-09-20 05:42:06.000000000 +0200
733 +++ head-2010-08-30/drivers/char/tpm/Makefile   2010-06-22 10:37:35.000000000 +0200
734 @@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o
735  obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
736  obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
737  obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
738 +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
739 +tpm_xenu-y = tpm_xen.o tpm_vtpm.o
740 --- head-2010-08-30.orig/drivers/char/tpm/tpm.h 2010-08-02 00:11:14.000000000 +0200
741 +++ head-2010-08-30/drivers/char/tpm/tpm.h      2010-07-07 13:26:26.000000000 +0200
742 @@ -108,6 +108,9 @@ struct tpm_chip {
743         struct dentry **bios_dir;
744  
745         struct list_head list;
746 +#ifdef CONFIG_XEN
747 +       void *priv;
748 +#endif
749         void (*release) (struct device *);
750  };
751  
752 @@ -267,6 +270,18 @@ struct tpm_cmd_t {
753  
754  ssize_t        tpm_getcap(struct device *, __be32, cap_t *, const char *);
755  
756 +#ifdef CONFIG_XEN
757 +static inline void *chip_get_private(const struct tpm_chip *chip)
758 +{
759 +       return chip->priv;
760 +}
761 +
762 +static inline void chip_set_private(struct tpm_chip *chip, void *priv)
763 +{
764 +       chip->priv = priv;
765 +}
766 +#endif
767 +
768  extern void tpm_get_timeouts(struct tpm_chip *);
769  extern void tpm_gen_interrupt(struct tpm_chip *);
770  extern void tpm_continue_selftest(struct tpm_chip *);
771 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
772 +++ head-2010-08-30/drivers/char/tpm/tpm_vtpm.c 2010-06-22 10:37:35.000000000 +0200
773 @@ -0,0 +1,542 @@
774 +/*
775 + * Copyright (C) 2006 IBM Corporation
776 + *
777 + * Authors:
778 + * Stefan Berger <stefanb@us.ibm.com>
779 + *
780 + * Generic device driver part for device drivers in a virtualized
781 + * environment.
782 + *
783 + * This program is free software; you can redistribute it and/or
784 + * modify it under the terms of the GNU General Public License as
785 + * published by the Free Software Foundation, version 2 of the
786 + * License.
787 + *
788 + */
789 +
790 +#include <asm/uaccess.h>
791 +#include <linux/list.h>
792 +#include <linux/device.h>
793 +#include <linux/interrupt.h>
794 +#include <linux/platform_device.h>
795 +#include "tpm.h"
796 +#include "tpm_vtpm.h"
797 +
798 +/* read status bits */
799 +enum {
800 +       STATUS_BUSY = 0x01,
801 +       STATUS_DATA_AVAIL = 0x02,
802 +       STATUS_READY = 0x04
803 +};
804 +
805 +struct transmission {
806 +       struct list_head next;
807 +
808 +       unsigned char *request;
809 +       size_t  request_len;
810 +       size_t  request_buflen;
811 +
812 +       unsigned char *response;
813 +       size_t  response_len;
814 +       size_t  response_buflen;
815 +
816 +       unsigned int flags;
817 +};
818 +
819 +enum {
820 +       TRANSMISSION_FLAG_WAS_QUEUED = 0x1
821 +};
822 +
823 +
824 +enum {
825 +       DATAEX_FLAG_QUEUED_ONLY = 0x1
826 +};
827 +
828 +
829 +/* local variables */
830 +
831 +/* local function prototypes */
832 +static int _vtpm_send_queued(struct tpm_chip *chip);
833 +
834 +
835 +/* =============================================================
836 + * Some utility functions
837 + * =============================================================
838 + */
839 +static void vtpm_state_init(struct vtpm_state *vtpms)
840 +{
841 +       vtpms->current_request = NULL;
842 +       spin_lock_init(&vtpms->req_list_lock);
843 +       init_waitqueue_head(&vtpms->req_wait_queue);
844 +       INIT_LIST_HEAD(&vtpms->queued_requests);
845 +
846 +       vtpms->current_response = NULL;
847 +       spin_lock_init(&vtpms->resp_list_lock);
848 +       init_waitqueue_head(&vtpms->resp_wait_queue);
849 +
850 +       vtpms->disconnect_time = jiffies;
851 +}
852 +
853 +
854 +static inline struct transmission *transmission_alloc(void)
855 +{
856 +       return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
857 +}
858 +
859 +static unsigned char *
860 +transmission_set_req_buffer(struct transmission *t,
861 +                            unsigned char *buffer, size_t len)
862 +{
863 +       if (t->request_buflen < len) {
864 +               kfree(t->request);
865 +               t->request = kmalloc(len, GFP_KERNEL);
866 +               if (!t->request) {
867 +                       t->request_buflen = 0;
868 +                       return NULL;
869 +               }
870 +               t->request_buflen = len;
871 +       }
872 +
873 +       memcpy(t->request, buffer, len);
874 +       t->request_len = len;
875 +
876 +       return t->request;
877 +}
878 +
879 +static unsigned char *
880 +transmission_set_res_buffer(struct transmission *t,
881 +                            const unsigned char *buffer, size_t len)
882 +{
883 +       if (t->response_buflen < len) {
884 +               kfree(t->response);
885 +               t->response = kmalloc(len, GFP_ATOMIC);
886 +               if (!t->response) {
887 +                       t->response_buflen = 0;
888 +                       return NULL;
889 +               }
890 +               t->response_buflen = len;
891 +       }
892 +
893 +       memcpy(t->response, buffer, len);
894 +       t->response_len = len;
895 +
896 +       return t->response;
897 +}
898 +
899 +static inline void transmission_free(struct transmission *t)
900 +{
901 +       kfree(t->request);
902 +       kfree(t->response);
903 +       kfree(t);
904 +}
905 +
906 +/* =============================================================
907 + * Interface with the lower layer driver
908 + * =============================================================
909 + */
910 +/*
911 + * Lower layer uses this function to make a response available.
912 + */
913 +int vtpm_vd_recv(const struct tpm_chip *chip,
914 +                 const unsigned char *buffer, size_t count,
915 +                 void *ptr)
916 +{
917 +       unsigned long flags;
918 +       int ret_size = 0;
919 +       struct transmission *t;
920 +       struct vtpm_state *vtpms;
921 +
922 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
923 +
924 +       /*
925 +        * The list with requests must contain one request
926 +        * only and the element there must be the one that
927 +        * was passed to me from the front-end.
928 +        */
929 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
930 +       if (vtpms->current_request != ptr) {
931 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
932 +               return 0;
933 +       }
934 +
935 +       if ((t = vtpms->current_request)) {
936 +               transmission_free(t);
937 +               vtpms->current_request = NULL;
938 +       }
939 +
940 +       t = transmission_alloc();
941 +       if (t) {
942 +               if (!transmission_set_res_buffer(t, buffer, count)) {
943 +                       transmission_free(t);
944 +                       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
945 +                       return -ENOMEM;
946 +               }
947 +               ret_size = count;
948 +               vtpms->current_response = t;
949 +               wake_up_interruptible(&vtpms->resp_wait_queue);
950 +       }
951 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
952 +
953 +       return ret_size;
954 +}
955 +
956 +
957 +/*
958 + * Lower layer indicates its status (connected/disconnected)
959 + */
960 +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
961 +{
962 +       struct vtpm_state *vtpms;
963 +
964 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
965 +
966 +       vtpms->vd_status = vd_status;
967 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
968 +               vtpms->disconnect_time = jiffies;
969 +       }
970 +}
971 +
972 +/* =============================================================
973 + * Interface with the generic TPM driver
974 + * =============================================================
975 + */
976 +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
977 +{
978 +       int rc = 0;
979 +       unsigned long flags;
980 +       struct vtpm_state *vtpms;
981 +
982 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
983 +
984 +       /*
985 +        * Check if the previous operation only queued the command
986 +        * In this case there won't be a response, so I just
987 +        * return from here and reset that flag. In any other
988 +        * case I should receive a response from the back-end.
989 +        */
990 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
991 +       if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
992 +               vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
993 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
994 +               /*
995 +                * The first few commands (measurements) must be
996 +                * queued since it might not be possible to talk to the
997 +                * TPM, yet.
998 +                * Return a response of up to 30 '0's.
999 +                */
1000 +
1001 +               count = min_t(size_t, count, 30);
1002 +               memset(buf, 0x0, count);
1003 +               return count;
1004 +       }
1005 +       /*
1006 +        * Check whether something is in the responselist and if
1007 +        * there's nothing in the list wait for something to appear.
1008 +        */
1009 +
1010 +       if (!vtpms->current_response) {
1011 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1012 +               interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
1013 +                                              1000);
1014 +               spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
1015 +       }
1016 +
1017 +       if (vtpms->current_response) {
1018 +               struct transmission *t = vtpms->current_response;
1019 +               vtpms->current_response = NULL;
1020 +               rc = min(count, t->response_len);
1021 +               memcpy(buf, t->response, rc);
1022 +               transmission_free(t);
1023 +       }
1024 +
1025 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1026 +       return rc;
1027 +}
1028 +
1029 +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
1030 +{
1031 +       int rc = 0;
1032 +       unsigned long flags;
1033 +       struct transmission *t = transmission_alloc();
1034 +       struct vtpm_state *vtpms;
1035 +
1036 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1037 +
1038 +       if (!t)
1039 +               return -ENOMEM;
1040 +       /*
1041 +        * If there's a current request, it must be the
1042 +        * previous request that has timed out.
1043 +        */
1044 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1045 +       if (vtpms->current_request != NULL) {
1046 +               printk("WARNING: Sending although there is a request outstanding.\n"
1047 +                      "         Previous request must have timed out.\n");
1048 +               transmission_free(vtpms->current_request);
1049 +               vtpms->current_request = NULL;
1050 +       }
1051 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1052 +
1053 +       /*
1054 +        * Queue the packet if the driver below is not
1055 +        * ready, yet, or there is any packet already
1056 +        * in the queue.
1057 +        * If the driver below is ready, unqueue all
1058 +        * packets first before sending our current
1059 +        * packet.
1060 +        * For each unqueued packet, except for the
1061 +        * last (=current) packet, call the function
1062 +        * tpm_xen_recv to wait for the response to come
1063 +        * back.
1064 +        */
1065 +       if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1066 +               if (time_after(jiffies,
1067 +                              vtpms->disconnect_time + HZ * 10)) {
1068 +                       rc = -ENOENT;
1069 +               } else {
1070 +                       goto queue_it;
1071 +               }
1072 +       } else {
1073 +               /*
1074 +                * Send all queued packets.
1075 +                */
1076 +               if (_vtpm_send_queued(chip) == 0) {
1077 +
1078 +                       vtpms->current_request = t;
1079 +
1080 +                       rc = vtpm_vd_send(vtpms->tpm_private,
1081 +                                         buf,
1082 +                                         count,
1083 +                                         t);
1084 +                       /*
1085 +                        * The generic TPM driver will call
1086 +                        * the function to receive the response.
1087 +                        */
1088 +                       if (rc < 0) {
1089 +                               vtpms->current_request = NULL;
1090 +                               goto queue_it;
1091 +                       }
1092 +               } else {
1093 +queue_it:
1094 +                       if (!transmission_set_req_buffer(t, buf, count)) {
1095 +                               transmission_free(t);
1096 +                               rc = -ENOMEM;
1097 +                               goto exit;
1098 +                       }
1099 +                       /*
1100 +                        * An error occurred. Don't event try
1101 +                        * to send the current request. Just
1102 +                        * queue it.
1103 +                        */
1104 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1105 +                       vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
1106 +                       list_add_tail(&t->next, &vtpms->queued_requests);
1107 +                       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1108 +               }
1109 +       }
1110 +
1111 +exit:
1112 +       return rc;
1113 +}
1114 +
1115 +
1116 +/*
1117 + * Send all queued requests.
1118 + */
1119 +static int _vtpm_send_queued(struct tpm_chip *chip)
1120 +{
1121 +       int rc;
1122 +       int error = 0;
1123 +       long flags;
1124 +       unsigned char buffer[1];
1125 +       struct vtpm_state *vtpms;
1126 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1127 +
1128 +       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1129 +
1130 +       while (!list_empty(&vtpms->queued_requests)) {
1131 +               /*
1132 +                * Need to dequeue them.
1133 +                * Read the result into a dummy buffer.
1134 +                */
1135 +               struct transmission *qt = (struct transmission *)
1136 +                                         vtpms->queued_requests.next;
1137 +               list_del(&qt->next);
1138 +               vtpms->current_request = qt;
1139 +               spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1140 +
1141 +               rc = vtpm_vd_send(vtpms->tpm_private,
1142 +                                 qt->request,
1143 +                                 qt->request_len,
1144 +                                 qt);
1145 +
1146 +               if (rc < 0) {
1147 +                       spin_lock_irqsave(&vtpms->req_list_lock, flags);
1148 +                       if ((qt = vtpms->current_request) != NULL) {
1149 +                               /*
1150 +                                * requeue it at the beginning
1151 +                                * of the list
1152 +                                */
1153 +                               list_add(&qt->next,
1154 +                                        &vtpms->queued_requests);
1155 +                       }
1156 +                       vtpms->current_request = NULL;
1157 +                       error = 1;
1158 +                       break;
1159 +               }
1160 +               /*
1161 +                * After this point qt is not valid anymore!
1162 +                * It is freed when the front-end is delivering
1163 +                * the data by calling tpm_recv
1164 +                */
1165 +               /*
1166 +                * Receive response into provided dummy buffer
1167 +                */
1168 +               rc = vtpm_recv(chip, buffer, sizeof(buffer));
1169 +               spin_lock_irqsave(&vtpms->req_list_lock, flags);
1170 +       }
1171 +
1172 +       spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1173 +
1174 +       return error;
1175 +}
1176 +
1177 +static void vtpm_cancel(struct tpm_chip *chip)
1178 +{
1179 +       unsigned long flags;
1180 +       struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
1181 +
1182 +       spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1183 +
1184 +       if (!vtpms->current_response && vtpms->current_request) {
1185 +               spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1186 +               interruptible_sleep_on(&vtpms->resp_wait_queue);
1187 +               spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1188 +       }
1189 +
1190 +       if (vtpms->current_response) {
1191 +               struct transmission *t = vtpms->current_response;
1192 +               vtpms->current_response = NULL;
1193 +               transmission_free(t);
1194 +       }
1195 +
1196 +       spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
1197 +}
1198 +
1199 +static u8 vtpm_status(struct tpm_chip *chip)
1200 +{
1201 +       u8 rc = 0;
1202 +       unsigned long flags;
1203 +       struct vtpm_state *vtpms;
1204 +
1205 +       vtpms = (struct vtpm_state *)chip_get_private(chip);
1206 +
1207 +       spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1208 +       /*
1209 +        * Data are available if:
1210 +        *  - there's a current response
1211 +        *  - the last packet was queued only (this is fake, but necessary to
1212 +        *      get the generic TPM layer to call the receive function.)
1213 +        */
1214 +       if (vtpms->current_response ||
1215 +           0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
1216 +               rc = STATUS_DATA_AVAIL;
1217 +       } else if (!vtpms->current_response && !vtpms->current_request) {
1218 +               rc = STATUS_READY;
1219 +       }
1220 +
1221 +       spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1222 +       return rc;
1223 +}
1224 +
1225 +static struct file_operations vtpm_ops = {
1226 +       .owner = THIS_MODULE,
1227 +       .llseek = no_llseek,
1228 +       .open = tpm_open,
1229 +       .read = tpm_read,
1230 +       .write = tpm_write,
1231 +       .release = tpm_release,
1232 +};
1233 +
1234 +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
1235 +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
1236 +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
1237 +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
1238 +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
1239 +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
1240 +                  NULL);
1241 +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
1242 +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
1243 +
1244 +static struct attribute *vtpm_attrs[] = {
1245 +       &dev_attr_pubek.attr,
1246 +       &dev_attr_pcrs.attr,
1247 +       &dev_attr_enabled.attr,
1248 +       &dev_attr_active.attr,
1249 +       &dev_attr_owned.attr,
1250 +       &dev_attr_temp_deactivated.attr,
1251 +       &dev_attr_caps.attr,
1252 +       &dev_attr_cancel.attr,
1253 +       NULL,
1254 +};
1255 +
1256 +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
1257 +
1258 +#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
1259 +
1260 +static struct tpm_vendor_specific tpm_vtpm = {
1261 +       .recv = vtpm_recv,
1262 +       .send = vtpm_send,
1263 +       .cancel = vtpm_cancel,
1264 +       .status = vtpm_status,
1265 +       .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
1266 +       .req_complete_val  = STATUS_DATA_AVAIL,
1267 +       .req_canceled = STATUS_READY,
1268 +       .attr_group = &vtpm_attr_grp,
1269 +       .miscdev = {
1270 +               .fops = &vtpm_ops,
1271 +       },
1272 +       .duration = {
1273 +               TPM_LONG_TIMEOUT,
1274 +               TPM_LONG_TIMEOUT,
1275 +               TPM_LONG_TIMEOUT,
1276 +       },
1277 +};
1278 +
1279 +struct tpm_chip *init_vtpm(struct device *dev,
1280 +                           struct tpm_private *tp)
1281 +{
1282 +       long rc;
1283 +       struct tpm_chip *chip;
1284 +       struct vtpm_state *vtpms;
1285 +
1286 +       vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
1287 +       if (!vtpms)
1288 +               return ERR_PTR(-ENOMEM);
1289 +
1290 +       vtpm_state_init(vtpms);
1291 +       vtpms->tpm_private = tp;
1292 +
1293 +       chip = tpm_register_hardware(dev, &tpm_vtpm);
1294 +       if (!chip) {
1295 +               rc = -ENODEV;
1296 +               goto err_free_mem;
1297 +       }
1298 +
1299 +       chip_set_private(chip, vtpms);
1300 +
1301 +       return chip;
1302 +
1303 +err_free_mem:
1304 +       kfree(vtpms);
1305 +
1306 +       return ERR_PTR(rc);
1307 +}
1308 +
1309 +void cleanup_vtpm(struct device *dev)
1310 +{
1311 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1312 +       struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
1313 +       tpm_remove_hardware(dev);
1314 +       kfree(vtpms);
1315 +}
1316 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1317 +++ head-2010-08-30/drivers/char/tpm/tpm_vtpm.h 2010-06-22 10:37:35.000000000 +0200
1318 @@ -0,0 +1,55 @@
1319 +#ifndef TPM_VTPM_H
1320 +#define TPM_VTPM_H
1321 +
1322 +struct tpm_chip;
1323 +struct tpm_private;
1324 +
1325 +struct vtpm_state {
1326 +       struct transmission *current_request;
1327 +       spinlock_t           req_list_lock;
1328 +       wait_queue_head_t    req_wait_queue;
1329 +
1330 +       struct list_head     queued_requests;
1331 +
1332 +       struct transmission *current_response;
1333 +       spinlock_t           resp_list_lock;
1334 +       wait_queue_head_t    resp_wait_queue;     // processes waiting for responses
1335 +
1336 +       u8                   vd_status;
1337 +       u8                   flags;
1338 +
1339 +       unsigned long        disconnect_time;
1340 +
1341 +       /*
1342 +        * The following is a private structure of the underlying
1343 +        * driver. It is passed as parameter in the send function.
1344 +        */
1345 +       struct tpm_private *tpm_private;
1346 +};
1347 +
1348 +
1349 +enum vdev_status {
1350 +       TPM_VD_STATUS_DISCONNECTED = 0x0,
1351 +       TPM_VD_STATUS_CONNECTED = 0x1
1352 +};
1353 +
1354 +/* this function is called from tpm_vtpm.c */
1355 +int vtpm_vd_send(struct tpm_private * tp,
1356 +                 const u8 * buf, size_t count, void *ptr);
1357 +
1358 +/* these functions are offered by tpm_vtpm.c */
1359 +struct tpm_chip *init_vtpm(struct device *,
1360 +                           struct tpm_private *);
1361 +void cleanup_vtpm(struct device *);
1362 +int vtpm_vd_recv(const struct tpm_chip* chip,
1363 +                 const unsigned char *buffer, size_t count, void *ptr);
1364 +void vtpm_vd_status(const struct tpm_chip *, u8 status);
1365 +
1366 +static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
1367 +{
1368 +       struct tpm_chip *chip = dev_get_drvdata(dev);
1369 +       struct vtpm_state *vtpms = chip_get_private(chip);
1370 +       return vtpms->tpm_private;
1371 +}
1372 +
1373 +#endif
1374 --- /dev/null   1970-01-01 00:00:00.000000000 +0000
1375 +++ head-2010-08-30/drivers/char/tpm/tpm_xen.c  2010-06-22 10:37:35.000000000 +0200
1376 @@ -0,0 +1,722 @@
1377 +/*
1378 + * Copyright (c) 2005, IBM Corporation
1379 + *
1380 + * Author: Stefan Berger, stefanb@us.ibm.com
1381 + * Grant table support: Mahadevan Gomathisankaran
1382 + *
1383 + * This code has been derived from drivers/xen/netfront/netfront.c
1384 + *
1385 + * Copyright (c) 2002-2004, K A Fraser
1386 + *
1387 + * This program is free software; you can redistribute it and/or
1388 + * modify it under the terms of the GNU General Public License version 2
1389 + * as published by the Free Software Foundation; or, when distributed
1390 + * separately from the Linux kernel or incorporated into other
1391 + * software packages, subject to the following license:
1392 + *
1393 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1394 + * of this source file (the "Software"), to deal in the Software without
1395 + * restriction, including without limitation the rights to use, copy, modify,
1396 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1397 + * and to permit persons to whom the Software is furnished to do so, subject to
1398 + * the following conditions:
1399 + *
1400 + * The above copyright notice and this permission notice shall be included in
1401 + * all copies or substantial portions of the Software.
1402 + *
1403 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1404 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1405 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1406 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1407 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1408 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1409 + * IN THE SOFTWARE.
1410 + */
1411 +
1412 +#include <linux/errno.h>
1413 +#include <linux/err.h>
1414 +#include <linux/interrupt.h>
1415 +#include <linux/mutex.h>
1416 +#include <asm/uaccess.h>
1417 +#include <xen/evtchn.h>
1418 +#include <xen/interface/grant_table.h>
1419 +#include <xen/interface/io/tpmif.h>
1420 +#include <xen/gnttab.h>
1421 +#include <xen/xenbus.h>
1422 +#include "tpm.h"
1423 +#include "tpm_vtpm.h"
1424 +
1425 +#undef DEBUG
1426 +
1427 +/* local structures */
1428 +struct tpm_private {
1429 +       struct tpm_chip *chip;
1430 +
1431 +       tpmif_tx_interface_t *tx;
1432 +       atomic_t refcnt;
1433 +       unsigned int irq;
1434 +       u8 is_connected;
1435 +       u8 is_suspended;
1436 +
1437 +       spinlock_t tx_lock;
1438 +
1439 +       struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
1440 +
1441 +       atomic_t tx_busy;
1442 +       void *tx_remember;
1443 +
1444 +       domid_t backend_id;
1445 +       wait_queue_head_t wait_q;
1446 +
1447 +       struct xenbus_device *dev;
1448 +       int ring_ref;
1449 +};
1450 +
1451 +struct tx_buffer {
1452 +       unsigned int size;      // available space in data
1453 +       unsigned int len;       // used space in data
1454 +       unsigned char *data;    // pointer to a page
1455 +};
1456 +
1457 +
1458 +/* locally visible variables */
1459 +static grant_ref_t gref_head;
1460 +static struct tpm_private *my_priv;
1461 +
1462 +/* local function prototypes */
1463 +static irqreturn_t tpmif_int(int irq,
1464 +                             void *tpm_priv,
1465 +                             struct pt_regs *ptregs);
1466 +static void tpmif_rx_action(unsigned long unused);
1467 +static int tpmif_connect(struct xenbus_device *dev,
1468 +                         struct tpm_private *tp,
1469 +                         domid_t domid);
1470 +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
1471 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
1472 +static void tpmif_free_tx_buffers(struct tpm_private *tp);
1473 +static void tpmif_set_connected_state(struct tpm_private *tp,
1474 +                                      u8 newstate);
1475 +static int tpm_xmit(struct tpm_private *tp,
1476 +                    const u8 * buf, size_t count, int userbuffer,
1477 +                    void *remember);
1478 +static void destroy_tpmring(struct tpm_private *tp);
1479 +void __exit tpmif_exit(void);
1480 +
1481 +#define DPRINTK(fmt, args...) \
1482 +    pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
1483 +#define IPRINTK(fmt, args...) \
1484 +    printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
1485 +#define WPRINTK(fmt, args...) \
1486 +    printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
1487 +
1488 +#define GRANT_INVALID_REF      0
1489 +
1490 +
1491 +static inline int
1492 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
1493 +               int isuserbuffer)
1494 +{
1495 +       int copied = len;
1496 +
1497 +       if (len > txb->size)
1498 +               copied = txb->size;
1499 +       if (isuserbuffer) {
1500 +               if (copy_from_user(txb->data, src, copied))
1501 +                       return -EFAULT;
1502 +       } else {
1503 +               memcpy(txb->data, src, copied);
1504 +       }
1505 +       txb->len = len;
1506 +       return copied;
1507 +}
1508 +
1509 +static inline struct tx_buffer *tx_buffer_alloc(void)
1510 +{
1511 +       struct tx_buffer *txb;
1512 +
1513 +       txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
1514 +       if (!txb)
1515 +               return NULL;
1516 +
1517 +       txb->len = 0;
1518 +       txb->size = PAGE_SIZE;
1519 +       txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1520 +       if (txb->data == NULL) {
1521 +               kfree(txb);
1522 +               txb = NULL;
1523 +       }
1524 +
1525 +       return txb;
1526 +}
1527 +
1528 +
1529 +static inline void tx_buffer_free(struct tx_buffer *txb)
1530 +{
1531 +       if (txb) {
1532 +               free_page((long)txb->data);
1533 +               kfree(txb);
1534 +       }
1535 +}
1536 +
1537 +/**************************************************************
1538 + Utility function for the tpm_private structure
1539 +**************************************************************/
1540 +static void tpm_private_init(struct tpm_private *tp)
1541 +{
1542 +       spin_lock_init(&tp->tx_lock);
1543 +       init_waitqueue_head(&tp->wait_q);
1544 +       atomic_set(&tp->refcnt, 1);
1545 +}
1546 +
1547 +static void tpm_private_put(void)
1548 +{
1549 +       if (!atomic_dec_and_test(&my_priv->refcnt))
1550 +               return;
1551 +
1552 +       tpmif_free_tx_buffers(my_priv);
1553 +       kfree(my_priv);
1554 +       my_priv = NULL;
1555 +}
1556 +
1557 +static struct tpm_private *tpm_private_get(void)
1558 +{
1559 +       int err;
1560 +
1561 +       if (my_priv) {
1562 +               atomic_inc(&my_priv->refcnt);
1563 +               return my_priv;
1564 +       }
1565 +
1566 +       my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1567 +       if (!my_priv)
1568 +               return NULL;
1569 +
1570 +       tpm_private_init(my_priv);
1571 +       err = tpmif_allocate_tx_buffers(my_priv);
1572 +       if (err < 0)
1573 +               tpm_private_put();
1574 +
1575 +       return my_priv;
1576 +}
1577 +
1578 +/**************************************************************
1579 +
1580 + The interface to let the tpm plugin register its callback
1581 + function and send data to another partition using this module
1582 +
1583 +**************************************************************/
1584 +
1585 +static DEFINE_MUTEX(suspend_lock);
1586 +/*
1587 + * Send data via this module by calling this function
1588 + */
1589 +int vtpm_vd_send(struct tpm_private *tp,
1590 +                 const u8 * buf, size_t count, void *ptr)
1591 +{
1592 +       int sent;
1593 +
1594 +       mutex_lock(&suspend_lock);
1595 +       sent = tpm_xmit(tp, buf, count, 0, ptr);
1596 +       mutex_unlock(&suspend_lock);
1597 +
1598 +       return sent;
1599 +}
1600 +
1601 +/**************************************************************
1602 + XENBUS support code
1603 +**************************************************************/
1604 +
1605 +static int setup_tpmring(struct xenbus_device *dev,
1606 +                         struct tpm_private *tp)
1607 +{
1608 +       tpmif_tx_interface_t *sring;
1609 +       int err;
1610 +
1611 +       tp->ring_ref = GRANT_INVALID_REF;
1612 +
1613 +       sring = (void *)__get_free_page(GFP_KERNEL);
1614 +       if (!sring) {
1615 +               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1616 +               return -ENOMEM;
1617 +       }
1618 +       tp->tx = sring;
1619 +
1620 +       err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1621 +       if (err < 0) {
1622 +               free_page((unsigned long)sring);
1623 +               tp->tx = NULL;
1624 +               xenbus_dev_fatal(dev, err, "allocating grant reference");
1625 +               goto fail;
1626 +       }
1627 +       tp->ring_ref = err;
1628 +
1629 +       err = tpmif_connect(dev, tp, dev->otherend_id);
1630 +       if (err)
1631 +               goto fail;
1632 +
1633 +       return 0;
1634 +fail:
1635 +       destroy_tpmring(tp);
1636 +       return err;
1637 +}
1638 +
1639 +
1640 +static void destroy_tpmring(struct tpm_private *tp)
1641 +{
1642 +       tpmif_set_connected_state(tp, 0);
1643 +
1644 +       if (tp->ring_ref != GRANT_INVALID_REF) {
1645 +               gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1646 +               tp->ring_ref = GRANT_INVALID_REF;
1647 +               tp->tx = NULL;
1648 +       }
1649 +
1650 +       if (tp->irq)
1651 +               unbind_from_irqhandler(tp->irq, tp);
1652 +
1653 +       tp->irq = 0;
1654 +}
1655 +
1656 +
1657 +static int talk_to_backend(struct xenbus_device *dev,
1658 +                           struct tpm_private *tp)
1659 +{
1660 +       const char *message = NULL;
1661 +       int err;
1662 +       struct xenbus_transaction xbt;
1663 +
1664 +       err = setup_tpmring(dev, tp);
1665 +       if (err) {
1666 +               xenbus_dev_fatal(dev, err, "setting up ring");
1667 +               goto out;
1668 +       }
1669 +
1670 +again:
1671 +       err = xenbus_transaction_start(&xbt);
1672 +       if (err) {
1673 +               xenbus_dev_fatal(dev, err, "starting transaction");
1674 +               goto destroy_tpmring;
1675 +       }
1676 +
1677 +       err = xenbus_printf(xbt, dev->nodename,
1678 +                           "ring-ref","%u", tp->ring_ref);
1679 +       if (err) {
1680 +               message = "writing ring-ref";
1681 +               goto abort_transaction;
1682 +       }
1683 +
1684 +       err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1685 +                           irq_to_evtchn_port(tp->irq));
1686 +       if (err) {
1687 +               message = "writing event-channel";
1688 +               goto abort_transaction;
1689 +       }
1690 +
1691 +       err = xenbus_transaction_end(xbt, 0);
1692 +       if (err == -EAGAIN)
1693 +               goto again;
1694 +       if (err) {
1695 +               xenbus_dev_fatal(dev, err, "completing transaction");
1696 +               goto destroy_tpmring;
1697 +       }
1698 +
1699 +       xenbus_switch_state(dev, XenbusStateConnected);
1700 +
1701 +       return 0;
1702 +
1703 +abort_transaction:
1704 +       xenbus_transaction_end(xbt, 1);
1705 +       if (message)
1706 +               xenbus_dev_error(dev, err, "%s", message);
1707 +destroy_tpmring:
1708 +       destroy_tpmring(tp);
1709 +out:
1710 +       return err;
1711 +}
1712 +
1713 +/**
1714 + * Callback received when the backend's state changes.
1715 + */
1716 +static void backend_changed(struct xenbus_device *dev,
1717 +                           enum xenbus_state backend_state)
1718 +{
1719 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1720 +       DPRINTK("\n");
1721 +
1722 +       switch (backend_state) {
1723 +       case XenbusStateInitialising:
1724 +       case XenbusStateInitWait:
1725 +       case XenbusStateInitialised:
1726 +       case XenbusStateReconfiguring:
1727 +       case XenbusStateReconfigured:
1728 +       case XenbusStateUnknown:
1729 +               break;
1730 +
1731 +       case XenbusStateConnected:
1732 +               tpmif_set_connected_state(tp, 1);
1733 +               break;
1734 +
1735 +       case XenbusStateClosing:
1736 +               tpmif_set_connected_state(tp, 0);
1737 +               xenbus_frontend_closed(dev);
1738 +               break;
1739 +
1740 +       case XenbusStateClosed:
1741 +               tpmif_set_connected_state(tp, 0);
1742 +               if (tp->is_suspended == 0)
1743 +                       device_unregister(&dev->dev);
1744 +               xenbus_frontend_closed(dev);
1745 +               break;
1746 +       }
1747 +}
1748 +
1749 +static int tpmfront_probe(struct xenbus_device *dev,
1750 +                          const struct xenbus_device_id *id)
1751 +{
1752 +       int err;
1753 +       int handle;
1754 +       struct tpm_private *tp = tpm_private_get();
1755 +
1756 +       if (!tp)
1757 +               return -ENOMEM;
1758 +
1759 +       tp->chip = init_vtpm(&dev->dev, tp);
1760 +       if (IS_ERR(tp->chip))
1761 +               return PTR_ERR(tp->chip);
1762 +
1763 +       err = xenbus_scanf(XBT_NIL, dev->nodename,
1764 +                          "handle", "%i", &handle);
1765 +       if (XENBUS_EXIST_ERR(err))
1766 +               return err;
1767 +
1768 +       if (err < 0) {
1769 +               xenbus_dev_fatal(dev,err,"reading virtual-device");
1770 +               return err;
1771 +       }
1772 +
1773 +       tp->dev = dev;
1774 +
1775 +       err = talk_to_backend(dev, tp);
1776 +       if (err) {
1777 +               tpm_private_put();
1778 +               return err;
1779 +       }
1780 +
1781 +       return 0;
1782 +}
1783 +
1784 +
1785 +static int tpmfront_remove(struct xenbus_device *dev)
1786 +{
1787 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1788 +       destroy_tpmring(tp);
1789 +       cleanup_vtpm(&dev->dev);
1790 +       return 0;
1791 +}
1792 +
1793 +static int tpmfront_suspend(struct xenbus_device *dev)
1794 +{
1795 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1796 +       u32 ctr;
1797 +
1798 +       /* Take the lock, preventing any application from sending. */
1799 +       mutex_lock(&suspend_lock);
1800 +       tp->is_suspended = 1;
1801 +
1802 +       for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1803 +               if ((ctr % 10) == 0)
1804 +                       printk("TPM-FE [INFO]: Waiting for outstanding "
1805 +                              "request.\n");
1806 +               /* Wait for a request to be responded to. */
1807 +               interruptible_sleep_on_timeout(&tp->wait_q, 100);
1808 +       }
1809 +
1810 +       return 0;
1811 +}
1812 +
1813 +static int tpmfront_suspend_finish(struct tpm_private *tp)
1814 +{
1815 +       tp->is_suspended = 0;
1816 +       /* Allow applications to send again. */
1817 +       mutex_unlock(&suspend_lock);
1818 +       return 0;
1819 +}
1820 +
1821 +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1822 +{
1823 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1824 +       return tpmfront_suspend_finish(tp);
1825 +}
1826 +
1827 +static int tpmfront_resume(struct xenbus_device *dev)
1828 +{
1829 +       struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1830 +       destroy_tpmring(tp);
1831 +       return talk_to_backend(dev, tp);
1832 +}
1833 +
1834 +static int tpmif_connect(struct xenbus_device *dev,
1835 +                         struct tpm_private *tp,
1836 +                         domid_t domid)
1837 +{
1838 +       int err;
1839 +
1840 +       tp->backend_id = domid;
1841 +
1842 +       err = bind_listening_port_to_irqhandler(
1843 +               domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1844 +       if (err <= 0) {
1845 +               WPRINTK("bind_listening_port_to_irqhandler failed "
1846 +                       "(err=%d)\n", err);
1847 +               return err;
1848 +       }
1849 +       tp->irq = err;
1850 +
1851 +       return 0;
1852 +}
1853 +
1854 +static struct xenbus_device_id tpmfront_ids[] = {
1855 +       { "vtpm" },
1856 +       { "" }
1857 +};
1858 +
1859 +static struct xenbus_driver tpmfront = {
1860 +       .name = "vtpm",
1861 +       .owner = THIS_MODULE,
1862 +       .ids = tpmfront_ids,
1863 +       .probe = tpmfront_probe,
1864 +       .remove =  tpmfront_remove,
1865 +       .resume = tpmfront_resume,
1866 +       .otherend_changed = backend_changed,
1867 +       .suspend = tpmfront_suspend,
1868 +       .suspend_cancel = tpmfront_suspend_cancel,
1869 +};
1870 +
1871 +static void __init init_tpm_xenbus(void)
1872 +{
1873 +       xenbus_register_frontend(&tpmfront);
1874 +}
1875 +
1876 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1877 +{
1878 +       unsigned int i;
1879 +
1880 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1881 +               tp->tx_buffers[i] = tx_buffer_alloc();
1882 +               if (!tp->tx_buffers[i]) {
1883 +                       tpmif_free_tx_buffers(tp);
1884 +                       return -ENOMEM;
1885 +               }
1886 +       }
1887 +       return 0;
1888 +}
1889 +
1890 +static void tpmif_free_tx_buffers(struct tpm_private *tp)
1891 +{
1892 +       unsigned int i;
1893 +
1894 +       for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1895 +               tx_buffer_free(tp->tx_buffers[i]);
1896 +}
1897 +
1898 +static void tpmif_rx_action(unsigned long priv)
1899 +{
1900 +       struct tpm_private *tp = (struct tpm_private *)priv;
1901 +       int i = 0;
1902 +       unsigned int received;
1903 +       unsigned int offset = 0;
1904 +       u8 *buffer;
1905 +       tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1906 +
1907 +       atomic_set(&tp->tx_busy, 0);
1908 +       wake_up_interruptible(&tp->wait_q);
1909 +
1910 +       received = tx->size;
1911 +
1912 +       buffer = kmalloc(received, GFP_ATOMIC);
1913 +       if (!buffer)
1914 +               return;
1915 +
1916 +       for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1917 +               struct tx_buffer *txb = tp->tx_buffers[i];
1918 +               tpmif_tx_request_t *tx;
1919 +               unsigned int tocopy;
1920 +
1921 +               tx = &tp->tx->ring[i].req;
1922 +               tocopy = tx->size;
1923 +               if (tocopy > PAGE_SIZE)
1924 +                       tocopy = PAGE_SIZE;
1925 +
1926 +               memcpy(&buffer[offset], txb->data, tocopy);
1927 +
1928 +               gnttab_release_grant_reference(&gref_head, tx->ref);
1929 +
1930 +               offset += tocopy;
1931 +       }
1932 +
1933 +       vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
1934 +       kfree(buffer);
1935 +}
1936 +
1937 +
1938 +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
1939 +{
1940 +       struct tpm_private *tp = tpm_priv;
1941 +       unsigned long flags;
1942 +
1943 +       spin_lock_irqsave(&tp->tx_lock, flags);
1944 +       tpmif_rx_tasklet.data = (unsigned long)tp;
1945 +       tasklet_schedule(&tpmif_rx_tasklet);
1946 +       spin_unlock_irqrestore(&tp->tx_lock, flags);
1947 +
1948 +       return IRQ_HANDLED;
1949 +}
1950 +
1951 +
1952 +static int tpm_xmit(struct tpm_private *tp,
1953 +                    const u8 * buf, size_t count, int isuserbuffer,
1954 +                    void *remember)
1955 +{
1956 +       tpmif_tx_request_t *tx;
1957 +       TPMIF_RING_IDX i;
1958 +       unsigned int offset = 0;
1959 +
1960 +       spin_lock_irq(&tp->tx_lock);
1961 +
1962 +       if (unlikely(atomic_read(&tp->tx_busy))) {
1963 +               printk("tpm_xmit: There's an outstanding request/response "
1964 +                      "on the way!\n");
1965 +               spin_unlock_irq(&tp->tx_lock);
1966 +               return -EBUSY;
1967 +       }
1968 +
1969 +       if (tp->is_connected != 1) {
1970 +               spin_unlock_irq(&tp->tx_lock);
1971 +               return -EIO;
1972 +       }
1973 +
1974 +       for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
1975 +               struct tx_buffer *txb = tp->tx_buffers[i];
1976 +               int copied;
1977 +
1978 +               if (!txb) {
1979 +                       DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
1980 +                               "Not transmitting anything!\n", i);
1981 +                       spin_unlock_irq(&tp->tx_lock);
1982 +                       return -EFAULT;
1983 +               }
1984 +
1985 +               copied = tx_buffer_copy(txb, &buf[offset], count,
1986 +                                       isuserbuffer);
1987 +               if (copied < 0) {
1988 +                       /* An error occurred */
1989 +                       spin_unlock_irq(&tp->tx_lock);
1990 +                       return copied;
1991 +               }
1992 +               count -= copied;
1993 +               offset += copied;
1994 +
1995 +               tx = &tp->tx->ring[i].req;
1996 +               tx->addr = virt_to_machine(txb->data);
1997 +               tx->size = txb->len;
1998 +               tx->unused = 0;
1999 +
2000 +               DPRINTK("First 4 characters sent by TPM-FE are "
2001 +                       "0x%02x 0x%02x 0x%02x 0x%02x\n",
2002 +                       txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
2003 +
2004 +               /* Get the granttable reference for this page. */
2005 +               tx->ref = gnttab_claim_grant_reference(&gref_head);
2006 +               if (tx->ref == -ENOSPC) {
2007 +                       spin_unlock_irq(&tp->tx_lock);
2008 +                       DPRINTK("Grant table claim reference failed in "
2009 +                               "func:%s line:%d file:%s\n",
2010 +                               __FUNCTION__, __LINE__, __FILE__);
2011 +                       return -ENOSPC;
2012 +               }
2013 +               gnttab_grant_foreign_access_ref(tx->ref,
2014 +                                               tp->backend_id,
2015 +                                               virt_to_mfn(txb->data),
2016 +                                               0 /*RW*/);
2017 +               wmb();
2018 +       }
2019 +
2020 +       atomic_set(&tp->tx_busy, 1);
2021 +       tp->tx_remember = remember;
2022 +
2023 +       mb();
2024 +
2025 +       notify_remote_via_irq(tp->irq);
2026 +
2027 +       spin_unlock_irq(&tp->tx_lock);
2028 +       return offset;
2029 +}
2030 +
2031 +
2032 +static void tpmif_notify_upperlayer(struct tpm_private *tp)
2033 +{
2034 +       /* Notify upper layer about the state of the connection to the BE. */
2035 +       vtpm_vd_status(tp->chip, (tp->is_connected
2036 +                                 ? TPM_VD_STATUS_CONNECTED
2037 +                                 : TPM_VD_STATUS_DISCONNECTED));
2038 +}
2039 +
2040 +
2041 +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
2042 +{
2043 +       /*
2044 +        * Don't notify upper layer if we are in suspend mode and
2045 +        * should disconnect - assumption is that we will resume
2046 +        * The mutex keeps apps from sending.
2047 +        */
2048 +       if (is_connected == 0 && tp->is_suspended == 1)
2049 +               return;
2050 +
2051 +       /*
2052 +        * Unlock the mutex if we are connected again
2053 +        * after being suspended - now resuming.
2054 +        * This also removes the suspend state.
2055 +        */
2056 +       if (is_connected == 1 && tp->is_suspended == 1)
2057 +               tpmfront_suspend_finish(tp);
2058 +
2059 +       if (is_connected != tp->is_connected) {
2060 +               tp->is_connected = is_connected;
2061 +               tpmif_notify_upperlayer(tp);
2062 +       }
2063 +}
2064 +
2065 +
2066 +
2067 +/* =================================================================
2068 + * Initialization function.
2069 + * =================================================================
2070 + */
2071 +
2072 +
2073 +static int __init tpmif_init(void)
2074 +{
2075 +       struct tpm_private *tp;
2076 +
2077 +       if (is_initial_xendomain())
2078 +               return -EPERM;
2079 +
2080 +       tp = tpm_private_get();
2081 +       if (!tp)
2082 +               return -ENOMEM;
2083 +
2084 +       IPRINTK("Initialising the vTPM driver.\n");
2085 +       if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
2086 +                                         &gref_head) < 0) {
2087 +               tpm_private_put();
2088 +               return -EFAULT;
2089 +       }
2090 +
2091 +       init_tpm_xenbus();
2092 +       return 0;
2093 +}
2094 +
2095 +
2096 +module_init(tpmif_init);
2097 +
2098 +MODULE_LICENSE("Dual BSD/GPL");
2099 --- head-2010-08-30.orig/drivers/edac/edac_mc.c 2010-02-24 19:52:17.000000000 +0100
2100 +++ head-2010-08-30/drivers/edac/edac_mc.c      2010-06-22 10:37:35.000000000 +0200
2101 @@ -602,6 +602,10 @@ static void edac_mc_scrub_block(unsigned
2102  
2103         debugf3("%s()\n", __func__);
2104  
2105 +#ifdef CONFIG_XEN
2106 +       page = mfn_to_local_pfn(page);
2107 +#endif
2108 +
2109         /* ECC error page was not in our memory. Ignore it. */
2110         if (!pfn_valid(page))
2111                 return;
2112 --- head-2010-08-30.orig/drivers/firmware/dell_rbu.c    2010-08-02 00:11:14.000000000 +0200
2113 +++ head-2010-08-30/drivers/firmware/dell_rbu.c 2010-06-22 10:37:35.000000000 +0200
2114 @@ -170,9 +170,28 @@ static int create_packet(void *data, siz
2115                         spin_lock(&rbu_data.lock);
2116                         goto out_alloc_packet_array;
2117                 }
2118 +#ifdef CONFIG_XEN
2119 +               if (ordernum && xen_create_contiguous_region(
2120 +                       (unsigned long)packet_data_temp_buf, ordernum, 0)) {
2121 +                       free_pages((unsigned long)packet_data_temp_buf,
2122 +                                  ordernum);
2123 +                       printk(KERN_WARNING
2124 +                               "dell_rbu:%s: failed to adjust new "
2125 +                               "packet\n", __func__);
2126 +                       retval = -ENOMEM;
2127 +                       spin_lock(&rbu_data.lock);
2128 +                       goto out_alloc_packet_array;
2129 +               }
2130 +#endif
2131  
2132 -               if ((unsigned long)virt_to_phys(packet_data_temp_buf)
2133 +               if ((unsigned long)virt_to_bus(packet_data_temp_buf)
2134                                 < allocation_floor) {
2135 +#ifdef CONFIG_XEN
2136 +                       if (ordernum)
2137 +                               xen_destroy_contiguous_region(
2138 +                                       (unsigned long)packet_data_temp_buf,
2139 +                                       ordernum);
2140 +#endif
2141                         pr_debug("packet 0x%lx below floor at 0x%lx.\n",
2142                                         (unsigned long)virt_to_phys(
2143                                                 packet_data_temp_buf),
2144 @@ -186,7 +205,7 @@ static int create_packet(void *data, siz
2145         newpacket->data = packet_data_temp_buf;
2146  
2147         pr_debug("create_packet: newpacket at physical addr %lx\n",
2148 -               (unsigned long)virt_to_phys(newpacket->data));
2149 +               (unsigned long)virt_to_bus(newpacket->data));
2150  
2151         /* packets may not have fixed size */
2152         newpacket->length = length;
2153 @@ -205,7 +224,7 @@ out_alloc_packet_array:
2154         /* always free packet array */
2155         for (;idx>0;idx--) {
2156                 pr_debug("freeing unused packet below floor 0x%lx.\n",
2157 -                       (unsigned long)virt_to_phys(
2158 +                       (unsigned long)virt_to_bus(
2159                                 invalid_addr_packet_array[idx-1]));
2160                 free_pages((unsigned long)invalid_addr_packet_array[idx-1],
2161                         ordernum);
2162 @@ -349,6 +368,13 @@ static void packet_empty_list(void)
2163                  * to make sure there are no stale RBU packets left in memory
2164                  */
2165                 memset(newpacket->data, 0, rbu_data.packetsize);
2166 +#ifdef CONFIG_XEN
2167 +               if (newpacket->ordernum)
2168 +                       xen_destroy_contiguous_region(
2169 +                               (unsigned long)newpacket->data,
2170 +                               newpacket->ordernum);
2171 +#endif
2172 +
2173                 free_pages((unsigned long) newpacket->data,
2174                         newpacket->ordernum);
2175                 kfree(newpacket);
2176 @@ -403,7 +429,9 @@ static int img_update_realloc(unsigned l
2177  {
2178         unsigned char *image_update_buffer = NULL;
2179         unsigned long rc;
2180 +#ifndef CONFIG_XEN
2181         unsigned long img_buf_phys_addr;
2182 +#endif
2183         int ordernum;
2184         int dma_alloc = 0;
2185  
2186 @@ -434,15 +462,19 @@ static int img_update_realloc(unsigned l
2187  
2188         spin_unlock(&rbu_data.lock);
2189  
2190 +#ifndef CONFIG_XEN
2191         ordernum = get_order(size);
2192         image_update_buffer =
2193                 (unsigned char *) __get_free_pages(GFP_KERNEL, ordernum);
2194  
2195         img_buf_phys_addr =
2196 -               (unsigned long) virt_to_phys(image_update_buffer);
2197 +               (unsigned long) virt_to_bus(image_update_buffer);
2198  
2199         if (img_buf_phys_addr > BIOS_SCAN_LIMIT) {
2200                 free_pages((unsigned long) image_update_buffer, ordernum);
2201 +#else
2202 +       {
2203 +#endif
2204                 ordernum = -1;
2205                 image_update_buffer = dma_alloc_coherent(NULL, size,
2206                         &dell_rbu_dmaaddr, GFP_KERNEL);
2207 @@ -695,6 +727,12 @@ static struct bin_attribute rbu_packet_s
2208  static int __init dcdrbu_init(void)
2209  {
2210         int rc;
2211 +
2212 +#ifdef CONFIG_XEN
2213 +       if (!is_initial_xendomain())
2214 +               return -ENODEV;
2215 +#endif
2216 +
2217         spin_lock_init(&rbu_data.lock);
2218  
2219         init_packet_head();
2220 --- head-2010-08-30.orig/drivers/ide/ide-lib.c  2009-09-10 00:13:59.000000000 +0200
2221 +++ head-2010-08-30/drivers/ide/ide-lib.c       2010-06-22 10:37:35.000000000 +0200
2222 @@ -18,12 +18,12 @@ void ide_toggle_bounce(ide_drive_t *driv
2223  {
2224         u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
2225  
2226 -       if (!PCI_DMA_BUS_IS_PHYS) {
2227 -               addr = BLK_BOUNCE_ANY;
2228 -       } else if (on && drive->media == ide_disk) {
2229 +       if (on && drive->media == ide_disk) {
2230                 struct device *dev = drive->hwif->dev;
2231  
2232 -               if (dev && dev->dma_mask)
2233 +               if (!PCI_DMA_BUS_IS_PHYS)
2234 +                       addr = BLK_BOUNCE_ANY;
2235 +               else if (dev && dev->dma_mask)
2236                         addr = *dev->dma_mask;
2237         }
2238  
2239 --- head-2010-08-30.orig/drivers/oprofile/buffer_sync.c 2010-05-16 23:17:36.000000000 +0200
2240 +++ head-2010-08-30/drivers/oprofile/buffer_sync.c      2010-06-22 10:37:35.000000000 +0200
2241 @@ -8,6 +8,10 @@
2242   * @author Barry Kasindorf
2243   * @author Robert Richter <robert.richter@amd.com>
2244   *
2245 + * Modified by Aravind Menon for Xen
2246 + * These modifications are:
2247 + * Copyright (C) 2005 Hewlett-Packard Co.
2248 + *
2249   * This is the core of the buffer management. Each
2250   * CPU buffer is processed and entered into the
2251   * global event buffer. Such processing is necessary
2252 @@ -43,6 +47,8 @@ static cpumask_var_t marked_cpus;
2253  static DEFINE_SPINLOCK(task_mortuary);
2254  static void process_task_mortuary(void);
2255  
2256 +static int cpu_current_domain[NR_CPUS];
2257 +
2258  /* Take ownership of the task struct and place it on the
2259   * list for processing. Only after two full buffer syncs
2260   * does the task eventually get freed, because by then
2261 @@ -61,7 +67,6 @@ task_free_notify(struct notifier_block *
2262         return NOTIFY_OK;
2263  }
2264  
2265 -
2266  /* The task is on its way out. A sync of the buffer means we can catch
2267   * any remaining samples for this task.
2268   */
2269 @@ -154,6 +159,11 @@ static void end_sync(void)
2270  int sync_start(void)
2271  {
2272         int err;
2273 +       int i;
2274 +
2275 +       for (i = 0; i < NR_CPUS; i++) {
2276 +               cpu_current_domain[i] = COORDINATOR_DOMAIN;
2277 +       }
2278  
2279         if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
2280                 return -ENOMEM;
2281 @@ -285,13 +295,29 @@ static void add_cpu_switch(int i)
2282         last_cookie = INVALID_COOKIE;
2283  }
2284  
2285 -static void add_kernel_ctx_switch(unsigned int in_kernel)
2286 +static void add_cpu_mode_switch(unsigned int cpu_mode)
2287  {
2288         add_event_entry(ESCAPE_CODE);
2289 -       if (in_kernel)
2290 +       switch (cpu_mode) {
2291 +       case CPU_MODE_USER:
2292 +               add_event_entry(USER_ENTER_SWITCH_CODE);
2293 +               break;
2294 +       case CPU_MODE_KERNEL:
2295                 add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2296 -       else
2297 -               add_event_entry(KERNEL_EXIT_SWITCH_CODE);
2298 +               break;
2299 +       case CPU_MODE_XEN:
2300 +               add_event_entry(XEN_ENTER_SWITCH_CODE);
2301 +               break;
2302 +       default:
2303 +               break;
2304 +       }
2305 +}
2306 +
2307 +static void add_domain_switch(unsigned long domain_id)
2308 +{
2309 +       add_event_entry(ESCAPE_CODE);
2310 +       add_event_entry(DOMAIN_SWITCH_CODE);
2311 +       add_event_entry(domain_id);
2312  }
2313  
2314  static void
2315 @@ -372,12 +398,12 @@ static inline void add_sample_entry(unsi
2316   * for later lookup from userspace. Return 0 on failure.
2317   */
2318  static int
2319 -add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
2320 +add_sample(struct mm_struct *mm, struct op_sample *s, int cpu_mode)
2321  {
2322         unsigned long cookie;
2323         off_t offset;
2324  
2325 -       if (in_kernel) {
2326 +       if (cpu_mode >= CPU_MODE_KERNEL) {
2327                 add_sample_entry(s->eip, s->event);
2328                 return 1;
2329         }
2330 @@ -502,9 +528,10 @@ void sync_buffer(int cpu)
2331         unsigned long val;
2332         struct task_struct *new;
2333         unsigned long cookie = 0;
2334 -       int in_kernel = 1;
2335 +       int cpu_mode = CPU_MODE_KERNEL;
2336         sync_buffer_state state = sb_buffer_start;
2337         unsigned int i;
2338 +       int domain_switch = 0;
2339         unsigned long available;
2340         unsigned long flags;
2341         struct op_entry entry;
2342 @@ -514,6 +541,11 @@ void sync_buffer(int cpu)
2343  
2344         add_cpu_switch(cpu);
2345  
2346 +       /* We need to assign the first samples in this CPU buffer to the
2347 +          same domain that we were processing at the last sync_buffer */
2348 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2349 +               add_domain_switch(cpu_current_domain[cpu]);
2350 +
2351         op_cpu_buffer_reset(cpu);
2352         available = op_cpu_buffer_entries(cpu);
2353  
2354 @@ -522,6 +554,13 @@ void sync_buffer(int cpu)
2355                 if (!sample)
2356                         break;
2357  
2358 +               if (domain_switch) {
2359 +                       cpu_current_domain[cpu] = sample->eip;
2360 +                       add_domain_switch(sample->eip);
2361 +                       domain_switch = 0;
2362 +                       continue;
2363 +               }
2364 +
2365                 if (is_code(sample->eip)) {
2366                         flags = sample->event;
2367                         if (flags & TRACE_BEGIN) {
2368 @@ -530,10 +569,10 @@ void sync_buffer(int cpu)
2369                         }
2370                         if (flags & KERNEL_CTX_SWITCH) {
2371                                 /* kernel/userspace switch */
2372 -                               in_kernel = flags & IS_KERNEL;
2373 +                               cpu_mode = flags & CPU_MODE_MASK;
2374                                 if (state == sb_buffer_start)
2375                                         state = sb_sample_start;
2376 -                               add_kernel_ctx_switch(flags & IS_KERNEL);
2377 +                               add_cpu_mode_switch(cpu_mode);
2378                         }
2379                         if (flags & USER_CTX_SWITCH
2380                             && op_cpu_buffer_get_data(&entry, &val)) {
2381 @@ -546,16 +585,23 @@ void sync_buffer(int cpu)
2382                                         cookie = get_exec_dcookie(mm);
2383                                 add_user_ctx_switch(new, cookie);
2384                         }
2385 +                       if (flags & DOMAIN_SWITCH)
2386 +                               domain_switch = 1;
2387                         if (op_cpu_buffer_get_size(&entry))
2388                                 add_data(&entry, mm);
2389                         continue;
2390                 }
2391  
2392 +               if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2393 +                       add_sample_entry(sample->eip, sample->event);
2394 +                       continue;
2395 +               }
2396 +
2397                 if (state < sb_bt_start)
2398                         /* ignore sample */
2399                         continue;
2400  
2401 -               if (add_sample(mm, sample, in_kernel))
2402 +               if (add_sample(mm, sample, cpu_mode))
2403                         continue;
2404  
2405                 /* ignore backtraces if failed to add a sample */
2406 @@ -566,6 +612,10 @@ void sync_buffer(int cpu)
2407         }
2408         release_mm(mm);
2409  
2410 +       /* We reset domain to COORDINATOR at each CPU switch */
2411 +       if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN)
2412 +               add_domain_switch(COORDINATOR_DOMAIN);
2413 +
2414         mark_done(cpu);
2415  
2416         mutex_unlock(&buffer_mutex);
2417 --- head-2010-08-30.orig/drivers/oprofile/cpu_buffer.c  2010-08-02 00:11:14.000000000 +0200
2418 +++ head-2010-08-30/drivers/oprofile/cpu_buffer.c       2010-06-22 10:37:35.000000000 +0200
2419 @@ -8,6 +8,10 @@
2420   * @author Barry Kasindorf <barry.kasindorf@amd.com>
2421   * @author Robert Richter <robert.richter@amd.com>
2422   *
2423 + * Modified by Aravind Menon for Xen
2424 + * These modifications are:
2425 + * Copyright (C) 2005 Hewlett-Packard Co.
2426 + *
2427   * Each CPU has a local buffer that stores PC value/event
2428   * pairs. We also log context switches when we notice them.
2429   * Eventually each CPU's buffer is processed into the global
2430 @@ -38,6 +42,8 @@ static void wq_sync_buffer(struct work_s
2431  #define DEFAULT_TIMER_EXPIRE (HZ / 10)
2432  static int work_enabled;
2433  
2434 +static int32_t current_domain = COORDINATOR_DOMAIN;
2435 +
2436  unsigned long oprofile_get_cpu_buffer_size(void)
2437  {
2438         return oprofile_cpu_buffer_size;
2439 @@ -75,7 +81,7 @@ int alloc_cpu_buffers(void)
2440                 struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
2441  
2442                 b->last_task = NULL;
2443 -               b->last_is_kernel = -1;
2444 +               b->last_cpu_mode = -1;
2445                 b->tracing = 0;
2446                 b->buffer_size = buffer_size;
2447                 b->sample_received = 0;
2448 @@ -178,7 +184,7 @@ unsigned long op_cpu_buffer_entries(int 
2449  
2450  static int
2451  op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
2452 -           int is_kernel, struct task_struct *task)
2453 +           int cpu_mode, struct task_struct *task)
2454  {
2455         struct op_entry entry;
2456         struct op_sample *sample;
2457 @@ -191,16 +197,15 @@ op_add_code(struct oprofile_cpu_buffer *
2458                 flags |= TRACE_BEGIN;
2459  
2460         /* notice a switch from user->kernel or vice versa */
2461 -       is_kernel = !!is_kernel;
2462 -       if (cpu_buf->last_is_kernel != is_kernel) {
2463 -               cpu_buf->last_is_kernel = is_kernel;
2464 -               flags |= KERNEL_CTX_SWITCH;
2465 -               if (is_kernel)
2466 -                       flags |= IS_KERNEL;
2467 +       if (cpu_buf->last_cpu_mode != cpu_mode) {
2468 +               cpu_buf->last_cpu_mode = cpu_mode;
2469 +               flags |= KERNEL_CTX_SWITCH | cpu_mode;
2470         }
2471  
2472         /* notice a task switch */
2473 -       if (cpu_buf->last_task != task) {
2474 +       /* if not processing other domain samples */
2475 +       if (cpu_buf->last_task != task &&
2476 +           current_domain == COORDINATOR_DOMAIN) {
2477                 cpu_buf->last_task = task;
2478                 flags |= USER_CTX_SWITCH;
2479         }
2480 @@ -249,14 +254,14 @@ op_add_sample(struct oprofile_cpu_buffer
2481  /*
2482   * This must be safe from any context.
2483   *
2484 - * is_kernel is needed because on some architectures you cannot
2485 + * cpu_mode is needed because on some architectures you cannot
2486   * tell if you are in kernel or user space simply by looking at
2487 - * pc. We tag this in the buffer by generating kernel enter/exit
2488 - * events whenever is_kernel changes
2489 + * pc. We tag this in the buffer by generating kernel/user (and
2490 + * xen) enter events whenever cpu_mode changes
2491   */
2492  static int
2493  log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
2494 -          unsigned long backtrace, int is_kernel, unsigned long event)
2495 +          unsigned long backtrace, int cpu_mode, unsigned long event)
2496  {
2497         cpu_buf->sample_received++;
2498  
2499 @@ -265,7 +270,7 @@ log_sample(struct oprofile_cpu_buffer *c
2500                 return 0;
2501         }
2502  
2503 -       if (op_add_code(cpu_buf, backtrace, is_kernel, current))
2504 +       if (op_add_code(cpu_buf, backtrace, cpu_mode, current))
2505                 goto fail;
2506  
2507         if (op_add_sample(cpu_buf, pc, event))
2508 @@ -428,6 +433,25 @@ fail:
2509         return;
2510  }
2511  
2512 +int oprofile_add_domain_switch(int32_t domain_id)
2513 +{
2514 +       struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
2515 +
2516 +       /* should have space for switching into and out of domain
2517 +          (2 slots each) plus one sample and one cpu mode switch */
2518 +       if (((nr_available_slots(cpu_buf) < 6) &&
2519 +            (domain_id != COORDINATOR_DOMAIN)) ||
2520 +           (nr_available_slots(cpu_buf) < 2))
2521 +               return 0;
2522 +
2523 +       add_code(cpu_buf, DOMAIN_SWITCH);
2524 +       add_sample(cpu_buf, domain_id, 0);
2525 +
2526 +       current_domain = domain_id;
2527 +
2528 +       return 1;
2529 +}
2530 +
2531  /*
2532   * This serves to avoid cpu buffer overflow, and makes sure
2533   * the task mortuary progresses
2534 --- head-2010-08-30.orig/drivers/oprofile/cpu_buffer.h  2010-02-24 19:52:17.000000000 +0100
2535 +++ head-2010-08-30/drivers/oprofile/cpu_buffer.h       2010-06-22 10:37:35.000000000 +0200
2536 @@ -40,7 +40,7 @@ struct op_entry;
2537  struct oprofile_cpu_buffer {
2538         unsigned long buffer_size;
2539         struct task_struct *last_task;
2540 -       int last_is_kernel;
2541 +       int last_cpu_mode;
2542         int tracing;
2543         unsigned long sample_received;
2544         unsigned long sample_lost_overflow;
2545 @@ -62,7 +62,7 @@ static inline void op_cpu_buffer_reset(i
2546  {
2547         struct oprofile_cpu_buffer *cpu_buf = &per_cpu(op_cpu_buffer, cpu);
2548  
2549 -       cpu_buf->last_is_kernel = -1;
2550 +       cpu_buf->last_cpu_mode = -1;
2551         cpu_buf->last_task = NULL;
2552  }
2553  
2554 @@ -112,9 +112,13 @@ int op_cpu_buffer_get_data(struct op_ent
2555  }
2556  
2557  /* extra data flags */
2558 -#define KERNEL_CTX_SWITCH      (1UL << 0)
2559 -#define IS_KERNEL              (1UL << 1)
2560 +#define CPU_MODE_USER          0
2561 +#define CPU_MODE_KERNEL                1
2562 +#define CPU_MODE_XEN           2
2563 +#define CPU_MODE_MASK          3
2564  #define TRACE_BEGIN            (1UL << 2)
2565  #define USER_CTX_SWITCH                (1UL << 3)
2566 +#define KERNEL_CTX_SWITCH      (1UL << 4)
2567 +#define DOMAIN_SWITCH          (1UL << 5)
2568  
2569  #endif /* OPROFILE_CPU_BUFFER_H */
2570 --- head-2010-08-30.orig/drivers/oprofile/event_buffer.h        2008-12-25 00:26:37.000000000 +0100
2571 +++ head-2010-08-30/drivers/oprofile/event_buffer.h     2010-06-22 10:37:35.000000000 +0200
2572 @@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void);
2573  #define INVALID_COOKIE ~0UL
2574  #define NO_COOKIE 0UL
2575  
2576 +/* Constant used to refer to coordinator domain (Xen) */
2577 +#define COORDINATOR_DOMAIN -1
2578 +
2579  extern const struct file_operations event_buffer_fops;
2580  
2581  /* mutex between sync_cpu_buffers() and the
2582 --- head-2010-08-30.orig/drivers/oprofile/oprof.c       2010-08-02 00:11:14.000000000 +0200
2583 +++ head-2010-08-30/drivers/oprofile/oprof.c    2010-06-22 10:37:35.000000000 +0200
2584 @@ -5,6 +5,10 @@
2585   * @remark Read the file COPYING
2586   *
2587   * @author John Levon <levon@movementarian.org>
2588 + *
2589 + * Modified by Aravind Menon for Xen
2590 + * These modifications are:
2591 + * Copyright (C) 2005 Hewlett-Packard Co.
2592   */
2593  
2594  #include <linux/kernel.h>
2595 @@ -35,6 +39,32 @@ static DEFINE_MUTEX(start_mutex);
2596   */
2597  static int timer = 0;
2598  
2599 +int oprofile_set_active(int active_domains[], unsigned int adomains)
2600 +{
2601 +       int err;
2602 +
2603 +       if (!oprofile_ops.set_active)
2604 +               return -EINVAL;
2605 +
2606 +       mutex_lock(&start_mutex);
2607 +       err = oprofile_ops.set_active(active_domains, adomains);
2608 +       mutex_unlock(&start_mutex);
2609 +       return err;
2610 +}
2611 +
2612 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
2613 +{
2614 +       int err;
2615 +
2616 +       if (!oprofile_ops.set_passive)
2617 +               return -EINVAL;
2618 +
2619 +       mutex_lock(&start_mutex);
2620 +       err = oprofile_ops.set_passive(passive_domains, pdomains);
2621 +       mutex_unlock(&start_mutex);
2622 +       return err;
2623 +}
2624 +
2625  int oprofile_setup(void)
2626  {
2627         int err;
2628 --- head-2010-08-30.orig/drivers/oprofile/oprof.h       2010-08-02 00:11:14.000000000 +0200
2629 +++ head-2010-08-30/drivers/oprofile/oprof.h    2010-06-22 10:37:35.000000000 +0200
2630 @@ -40,4 +40,7 @@ void oprofile_timer_exit(void);
2631  int oprofile_set_backtrace(unsigned long depth);
2632  int oprofile_set_timeout(unsigned long time);
2633  
2634 +int oprofile_set_active(int active_domains[], unsigned int adomains);
2635 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
2636 +
2637  #endif /* OPROF_H */
2638 --- head-2010-08-30.orig/drivers/oprofile/oprofile_files.c      2009-12-03 04:51:21.000000000 +0100
2639 +++ head-2010-08-30/drivers/oprofile/oprofile_files.c   2010-06-22 10:37:35.000000000 +0200
2640 @@ -5,11 +5,17 @@
2641   * @remark Read the file COPYING
2642   *
2643   * @author John Levon <levon@movementarian.org>
2644 + *
2645 + * Modified by Aravind Menon for Xen
2646 + * These modifications are:
2647 + * Copyright (C) 2005 Hewlett-Packard Co.
2648   */
2649  
2650  #include <linux/fs.h>
2651  #include <linux/oprofile.h>
2652  #include <linux/jiffies.h>
2653 +#include <asm/uaccess.h>
2654 +#include <linux/ctype.h>
2655  
2656  #include "event_buffer.h"
2657  #include "oprofile_stats.h"
2658 @@ -165,6 +171,195 @@ static const struct file_operations dump
2659         .write          = dump_write,
2660  };
2661  
2662 +#define TMPBUFSIZE 512
2663 +
2664 +static unsigned int adomains = 0;
2665 +static int active_domains[MAX_OPROF_DOMAINS + 1];
2666 +static DEFINE_MUTEX(adom_mutex);
2667 +
2668 +static ssize_t adomain_write(struct file * file, char const __user * buf,
2669 +                            size_t count, loff_t * offset)
2670 +{
2671 +       char *tmpbuf;
2672 +       char *startp, *endp;
2673 +       int i;
2674 +       unsigned long val;
2675 +       ssize_t retval = count;
2676 +
2677 +       if (*offset)
2678 +               return -EINVAL;
2679 +       if (count > TMPBUFSIZE - 1)
2680 +               return -EINVAL;
2681 +
2682 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2683 +               return -ENOMEM;
2684 +
2685 +       if (copy_from_user(tmpbuf, buf, count)) {
2686 +               kfree(tmpbuf);
2687 +               return -EFAULT;
2688 +       }
2689 +       tmpbuf[count] = 0;
2690 +
2691 +       mutex_lock(&adom_mutex);
2692 +
2693 +       startp = tmpbuf;
2694 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2695 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2696 +               val = simple_strtoul(startp, &endp, 0);
2697 +               if (endp == startp)
2698 +                       break;
2699 +               while (ispunct(*endp) || isspace(*endp))
2700 +                       endp++;
2701 +               active_domains[i] = val;
2702 +               if (active_domains[i] != val)
2703 +                       /* Overflow, force error below */
2704 +                       i = MAX_OPROF_DOMAINS + 1;
2705 +               startp = endp;
2706 +       }
2707 +       /* Force error on trailing junk */
2708 +       adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2709 +
2710 +       kfree(tmpbuf);
2711 +
2712 +       if (adomains > MAX_OPROF_DOMAINS
2713 +           || oprofile_set_active(active_domains, adomains)) {
2714 +               adomains = 0;
2715 +               retval = -EINVAL;
2716 +       }
2717 +
2718 +       mutex_unlock(&adom_mutex);
2719 +       return retval;
2720 +}
2721 +
2722 +static ssize_t adomain_read(struct file * file, char __user * buf,
2723 +                           size_t count, loff_t * offset)
2724 +{
2725 +       char * tmpbuf;
2726 +       size_t len;
2727 +       int i;
2728 +       ssize_t retval;
2729 +
2730 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2731 +               return -ENOMEM;
2732 +
2733 +       mutex_lock(&adom_mutex);
2734 +
2735 +       len = 0;
2736 +       for (i = 0; i < adomains; i++)
2737 +               len += snprintf(tmpbuf + len,
2738 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2739 +                               "%u ", active_domains[i]);
2740 +       WARN_ON(len > TMPBUFSIZE);
2741 +       if (len != 0 && len <= TMPBUFSIZE)
2742 +               tmpbuf[len-1] = '\n';
2743 +
2744 +       mutex_unlock(&adom_mutex);
2745 +
2746 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2747 +
2748 +       kfree(tmpbuf);
2749 +       return retval;
2750 +}
2751 +
2752 +
2753 +static const struct file_operations active_domain_ops = {
2754 +       .read           = adomain_read,
2755 +       .write          = adomain_write,
2756 +};
2757 +
2758 +static unsigned int pdomains = 0;
2759 +static int passive_domains[MAX_OPROF_DOMAINS];
2760 +static DEFINE_MUTEX(pdom_mutex);
2761 +
2762 +static ssize_t pdomain_write(struct file * file, char const __user * buf,
2763 +                            size_t count, loff_t * offset)
2764 +{
2765 +       char *tmpbuf;
2766 +       char *startp, *endp;
2767 +       int i;
2768 +       unsigned long val;
2769 +       ssize_t retval = count;
2770 +
2771 +       if (*offset)
2772 +               return -EINVAL;
2773 +       if (count > TMPBUFSIZE - 1)
2774 +               return -EINVAL;
2775 +
2776 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2777 +               return -ENOMEM;
2778 +
2779 +       if (copy_from_user(tmpbuf, buf, count)) {
2780 +               kfree(tmpbuf);
2781 +               return -EFAULT;
2782 +       }
2783 +       tmpbuf[count] = 0;
2784 +
2785 +       mutex_lock(&pdom_mutex);
2786 +
2787 +       startp = tmpbuf;
2788 +       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2789 +       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2790 +               val = simple_strtoul(startp, &endp, 0);
2791 +               if (endp == startp)
2792 +                       break;
2793 +               while (ispunct(*endp) || isspace(*endp))
2794 +                       endp++;
2795 +               passive_domains[i] = val;
2796 +               if (passive_domains[i] != val)
2797 +                       /* Overflow, force error below */
2798 +                       i = MAX_OPROF_DOMAINS + 1;
2799 +               startp = endp;
2800 +       }
2801 +       /* Force error on trailing junk */
2802 +       pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2803 +
2804 +       kfree(tmpbuf);
2805 +
2806 +       if (pdomains > MAX_OPROF_DOMAINS
2807 +           || oprofile_set_passive(passive_domains, pdomains)) {
2808 +               pdomains = 0;
2809 +               retval = -EINVAL;
2810 +       }
2811 +
2812 +       mutex_unlock(&pdom_mutex);
2813 +       return retval;
2814 +}
2815 +
2816 +static ssize_t pdomain_read(struct file * file, char __user * buf,
2817 +                           size_t count, loff_t * offset)
2818 +{
2819 +       char * tmpbuf;
2820 +       size_t len;
2821 +       int i;
2822 +       ssize_t retval;
2823 +
2824 +       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2825 +               return -ENOMEM;
2826 +
2827 +       mutex_lock(&pdom_mutex);
2828 +
2829 +       len = 0;
2830 +       for (i = 0; i < pdomains; i++)
2831 +               len += snprintf(tmpbuf + len,
2832 +                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2833 +                               "%u ", passive_domains[i]);
2834 +       WARN_ON(len > TMPBUFSIZE);
2835 +       if (len != 0 && len <= TMPBUFSIZE)
2836 +               tmpbuf[len-1] = '\n';
2837 +
2838 +       mutex_unlock(&pdom_mutex);
2839 +
2840 +       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2841 +
2842 +       kfree(tmpbuf);
2843 +       return retval;
2844 +}
2845 +
2846 +static const struct file_operations passive_domain_ops = {
2847 +       .read           = pdomain_read,
2848 +       .write          = pdomain_write,
2849 +};
2850 +
2851  void oprofile_create_files(struct super_block *sb, struct dentry *root)
2852  {
2853         /* reinitialize default values */
2854 @@ -175,6 +370,8 @@ void oprofile_create_files(struct super_
2855  
2856         oprofilefs_create_file(sb, root, "enable", &enable_fops);
2857         oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2858 +       oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2859 +       oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2860         oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2861         oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size);
2862         oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed);
2863 --- head-2010-08-30.orig/fs/aio.c       2010-08-24 11:01:01.000000000 +0200
2864 +++ head-2010-08-30/fs/aio.c    2010-06-22 10:37:35.000000000 +0200
2865 @@ -41,6 +41,11 @@
2866  #include <asm/kmap_types.h>
2867  #include <asm/uaccess.h>
2868  
2869 +#ifdef CONFIG_EPOLL
2870 +#include <linux/poll.h>
2871 +#include <linux/eventpoll.h>
2872 +#endif
2873 +
2874  #if DEBUG > 1
2875  #define dprintk                printk
2876  #else
2877 @@ -998,6 +1003,11 @@ put_rq:
2878         if (waitqueue_active(&ctx->wait))
2879                 wake_up(&ctx->wait);
2880  
2881 +#ifdef CONFIG_EPOLL
2882 +       if (ctx->file && waitqueue_active(&ctx->poll_wait))
2883 +               wake_up(&ctx->poll_wait);
2884 +#endif
2885 +
2886         spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2887         return ret;
2888  }
2889 @@ -1006,6 +1016,8 @@ EXPORT_SYMBOL(aio_complete);
2890  /* aio_read_evt
2891   *     Pull an event off of the ioctx's event ring.  Returns the number of 
2892   *     events fetched (0 or 1 ;-)
2893 + *     If ent parameter is 0, just returns the number of events that would
2894 + *     be fetched.
2895   *     FIXME: make this use cmpxchg.
2896   *     TODO: make the ringbuffer user mmap()able (requires FIXME).
2897   */
2898 @@ -1028,13 +1040,18 @@ static int aio_read_evt(struct kioctx *i
2899  
2900         head = ring->head % info->nr;
2901         if (head != ring->tail) {
2902 -               struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2903 -               *ent = *evp;
2904 -               head = (head + 1) % info->nr;
2905 -               smp_mb(); /* finish reading the event before updatng the head */
2906 -               ring->head = head;
2907 -               ret = 1;
2908 -               put_aio_ring_event(evp, KM_USER1);
2909 +               if (ent) { /* event requested */
2910 +                       struct io_event *evp =
2911 +                               aio_ring_event(info, head, KM_USER1);
2912 +                       *ent = *evp;
2913 +                       head = (head + 1) % info->nr;
2914 +                       /* finish reading the event before updatng the head */
2915 +                       smp_mb();
2916 +                       ring->head = head;
2917 +                       ret = 1;
2918 +                       put_aio_ring_event(evp, KM_USER1);
2919 +               } else /* only need to know availability */
2920 +                       ret = 1;
2921         }
2922         spin_unlock(&info->ring_lock);
2923  
2924 @@ -1219,6 +1236,13 @@ static void io_destroy(struct kioctx *io
2925  
2926         aio_cancel_all(ioctx);
2927         wait_for_all_aios(ioctx);
2928 +#ifdef CONFIG_EPOLL
2929 +       /* forget the poll file, but it's up to the user to close it */
2930 +       if (ioctx->file) {
2931 +               ioctx->file->private_data = 0;
2932 +               ioctx->file = 0;
2933 +       }
2934 +#endif
2935  
2936         /*
2937          * Wake up any waiters.  The setting of ctx->dead must be seen
2938 @@ -1229,6 +1253,67 @@ static void io_destroy(struct kioctx *io
2939         put_ioctx(ioctx);       /* once for the lookup */
2940  }
2941  
2942 +#ifdef CONFIG_EPOLL
2943 +
2944 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
2945 +{
2946 +       struct kioctx *ioctx = file->private_data;
2947 +       if (ioctx) {
2948 +               file->private_data = 0;
2949 +               spin_lock_irq(&ioctx->ctx_lock);
2950 +               ioctx->file = 0;
2951 +               spin_unlock_irq(&ioctx->ctx_lock);
2952 +       }
2953 +       return 0;
2954 +}
2955 +
2956 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
2957 +{      unsigned int pollflags = 0;
2958 +       struct kioctx *ioctx = file->private_data;
2959 +
2960 +       if (ioctx) {
2961 +
2962 +               spin_lock_irq(&ioctx->ctx_lock);
2963 +               /* Insert inside our poll wait queue */
2964 +               poll_wait(file, &ioctx->poll_wait, wait);
2965 +
2966 +               /* Check our condition */
2967 +               if (aio_read_evt(ioctx, 0))
2968 +                       pollflags = POLLIN | POLLRDNORM;
2969 +               spin_unlock_irq(&ioctx->ctx_lock);
2970 +       }
2971 +
2972 +       return pollflags;
2973 +}
2974 +
2975 +static const struct file_operations aioq_fops = {
2976 +       .release        = aio_queue_fd_close,
2977 +       .poll           = aio_queue_fd_poll
2978 +};
2979 +
2980 +/* make_aio_fd:
2981 + *  Create a file descriptor that can be used to poll the event queue.
2982 + *  Based and piggybacked on the excellent epoll code.
2983 + */
2984 +
2985 +static int make_aio_fd(struct kioctx *ioctx)
2986 +{
2987 +       int error, fd;
2988 +       struct inode *inode;
2989 +       struct file *file;
2990 +
2991 +       error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
2992 +       if (error)
2993 +               return error;
2994 +
2995 +       /* associate the file with the IO context */
2996 +       file->private_data = ioctx;
2997 +       ioctx->file = file;
2998 +       init_waitqueue_head(&ioctx->poll_wait);
2999 +       return fd;
3000 +}
3001 +#endif
3002 +
3003  /* sys_io_setup:
3004   *     Create an aio_context capable of receiving at least nr_events.
3005   *     ctxp must not point to an aio_context that already exists, and
3006 @@ -1241,18 +1326,30 @@ static void io_destroy(struct kioctx *io
3007   *     resources are available.  May fail with -EFAULT if an invalid
3008   *     pointer is passed for ctxp.  Will fail with -ENOSYS if not
3009   *     implemented.
3010 + *
3011 + *     To request a selectable fd, the user context has to be initialized
3012 + *     to 1, instead of 0, and the return value is the fd.
3013 + *     This keeps the system call compatible, since a non-zero value
3014 + *     was not allowed so far.
3015   */
3016  SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
3017  {
3018         struct kioctx *ioctx = NULL;
3019         unsigned long ctx;
3020         long ret;
3021 +       int make_fd = 0;
3022  
3023         ret = get_user(ctx, ctxp);
3024         if (unlikely(ret))
3025                 goto out;
3026  
3027         ret = -EINVAL;
3028 +#ifdef CONFIG_EPOLL
3029 +       if (ctx == 1) {
3030 +               make_fd = 1;
3031 +               ctx = 0;
3032 +       }
3033 +#endif
3034         if (unlikely(ctx || nr_events == 0)) {
3035                 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
3036                          ctx, nr_events);
3037 @@ -1263,8 +1360,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_e
3038         ret = PTR_ERR(ioctx);
3039         if (!IS_ERR(ioctx)) {
3040                 ret = put_user(ioctx->user_id, ctxp);
3041 -               if (!ret)
3042 -                       return 0;
3043 +#ifdef CONFIG_EPOLL
3044 +               if (make_fd && ret >= 0)
3045 +                       ret = make_aio_fd(ioctx);
3046 +#endif
3047 +               if (ret >= 0)
3048 +                       return ret;
3049  
3050                 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
3051                 io_destroy(ioctx);
3052 --- head-2010-08-30.orig/fs/compat_ioctl.c      2010-08-24 11:01:23.000000000 +0200
3053 +++ head-2010-08-30/fs/compat_ioctl.c   2010-08-24 11:38:05.000000000 +0200
3054 @@ -116,6 +116,13 @@
3055  #include <asm/fbio.h>
3056  #endif