- Update Xen patches to c/s 1011.
[opensuse:kernel-source.git] / patches.xen / xen3-patch-2.6.19
1 From: www.kernel.org
2 Subject: Linux 2.6.19
3 Patch-mainline: 2.6.19
4
5 Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 --- sle11sp1-2010-03-29.orig/arch/x86/Kconfig   2010-02-09 16:47:07.000000000 +0100
10 +++ sle11sp1-2010-03-29/arch/x86/Kconfig        2009-11-06 10:45:48.000000000 +0100
11 @@ -510,6 +510,7 @@ config SCHED_OMIT_FRAME_POINTER
12  
13  menuconfig PARAVIRT_GUEST
14         bool "Paravirtualized guest support"
15 +       depends on !XEN
16         ---help---
17           Say Y here to get to see options related to running Linux under
18           various hypervisors.  This option alone does not add any kernel code.
19 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/acpi/boot.c        2010-03-31 09:52:31.000000000 +0200
20 +++ sle11sp1-2010-03-29/arch/x86/kernel/acpi/boot.c     2010-03-22 12:18:02.000000000 +0100
21 @@ -69,8 +69,12 @@ int acpi_strict;
22  
23  u8 acpi_sci_flags __initdata;
24  int acpi_sci_override_gsi __initdata;
25 +#ifndef CONFIG_XEN
26  int acpi_skip_timer_override __initdata;
27  int acpi_use_timer_override __initdata;
28 +#else
29 +#define acpi_skip_timer_override 0
30 +#endif
31  
32  #ifdef CONFIG_X86_LOCAL_APIC
33  static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
34 @@ -490,6 +494,7 @@ int acpi_register_gsi(struct device *dev
35   */
36  #ifdef CONFIG_ACPI_HOTPLUG_CPU
37  
38 +#ifndef CONFIG_XEN
39  static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
40  {
41  #ifdef CONFIG_ACPI_NUMA
42 @@ -579,6 +584,9 @@ free_tmp_map:
43  out:
44         return retval;
45  }
46 +#else
47 +#define _acpi_map_lsapic(h, p) (-EINVAL)
48 +#endif
49  
50  /* wrapper to silence section mismatch warning */
51  int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu)
52 @@ -589,9 +597,11 @@ EXPORT_SYMBOL(acpi_map_lsapic);
53  
54  int acpi_unmap_lsapic(int cpu)
55  {
56 +#ifndef CONFIG_XEN
57         per_cpu(x86_cpu_to_apicid, cpu) = -1;
58         set_cpu_present(cpu, false);
59         num_processors--;
60 +#endif
61  
62         return (0);
63  }
64 @@ -1753,7 +1763,7 @@ int __init acpi_mps_check(void)
65         return 0;
66  }
67  
68 -#ifdef CONFIG_X86_IO_APIC
69 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
70  static int __init parse_acpi_skip_timer_override(char *arg)
71  {
72         acpi_skip_timer_override = 1;
73 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/apic_32-xen.c      2007-06-12 13:12:48.000000000 +0200
74 +++ sle11sp1-2010-03-29/arch/x86/kernel/apic_32-xen.c   2009-11-06 10:45:48.000000000 +0100
75 @@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi;
76  /*
77   * Knob to control our willingness to enable the local APIC.
78   */
79 -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
80  
81  /*
82   * Debug level
83 @@ -102,7 +101,7 @@ int get_physical_broadcast(void)
84  
85  #ifndef CONFIG_XEN
86  #ifndef CONFIG_SMP
87 -static void up_apic_timer_interrupt_call(struct pt_regs *regs)
88 +static void up_apic_timer_interrupt_call(void)
89  {
90         int cpu = smp_processor_id();
91  
92 @@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call
93          */
94         per_cpu(irq_stat, cpu).apic_timer_irqs++;
95  
96 -       smp_local_timer_interrupt(regs);
97 +       smp_local_timer_interrupt();
98  }
99  #endif
100  
101 -void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
102 +void smp_send_timer_broadcast_ipi(void)
103  {
104         cpumask_t mask;
105  
106 @@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct
107                  * We can directly call the apic timer interrupt handler
108                  * in UP case. Minus all irq related functions
109                  */
110 -               up_apic_timer_interrupt_call(regs);
111 +               up_apic_timer_interrupt_call();
112  #endif
113         }
114  }
115 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/common-xen.c   2009-05-19 09:16:41.000000000 +0200
116 +++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/common-xen.c        2009-11-06 10:45:48.000000000 +0100
117 @@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
118  
119  extern int disable_pse;
120  
121 -static void default_init(struct cpuinfo_x86 * c)
122 +static void __cpuinit default_init(struct cpuinfo_x86 * c)
123  {
124         /* Not much we can do here... */
125         /* Check if at least it has cpuid */
126 @@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_
127         }
128  }
129  
130 -static struct cpu_dev default_cpu = {
131 +static struct cpu_dev __cpuinitdata default_cpu = {
132         .c_init = default_init,
133         .c_vendor = "Unknown",
134  };
135 @@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str
136  
137  static int __init x86_fxsr_setup(char * s)
138  {
139 +       /* Tell all the other CPU's to not use it... */
140         disable_x86_fxsr = 1;
141 +
142 +       /*
143 +        * ... and clear the bits early in the boot_cpu_data
144 +        * so that the bootup process doesn't try to do this
145 +        * either.
146 +        */
147 +       clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
148 +       clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
149         return 1;
150  }
151  __setup("nofxsr", x86_fxsr_setup);
152 @@ -272,7 +281,7 @@ static void __init early_cpu_detect(void
153         }
154  }
155  
156 -void __cpuinit generic_identify(struct cpuinfo_x86 * c)
157 +static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
158  {
159         u32 tfms, xlvl;
160         int ebx;
161 @@ -700,8 +709,7 @@ old_gdt:
162          */
163         atomic_inc(&init_mm.mm_count);
164         current->active_mm = &init_mm;
165 -       if (current->mm)
166 -               BUG();
167 +       BUG_ON(current->mm);
168         enter_lazy_tlb(&init_mm, current);
169  
170         load_esp0(t, thread);
171 @@ -714,7 +722,7 @@ old_gdt:
172  #endif
173  
174         /* Clear %fs and %gs. */
175 -       asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
176 +       asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
177  
178         /* Clear all 6 debug registers: */
179         set_debugreg(0, 0);
180 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/mcheck/Makefile        2010-01-27 14:28:25.000000000 +0100
181 +++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/mcheck/Makefile     2010-01-27 14:29:48.000000000 +0100
182 @@ -9,3 +9,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += thres
183  obj-$(CONFIG_X86_MCE_INJECT)   += mce-inject.o
184  
185  obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
186 +
187 +disabled-obj-$(CONFIG_XEN)     := therm_throt.o
188 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c      2009-10-01 11:00:47.000000000 +0200
189 +++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/mcheck/mce_dom0.c   2009-11-06 10:45:48.000000000 +0100
190 @@ -53,8 +53,7 @@ static struct mc_info *g_mi;
191  
192  /*dom0 mce virq handler, logging physical mce error info*/
193  
194 -static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id,
195 -                                                                       struct pt_regs *regs)
196 +static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id)
197  {
198         xen_mc_t mc_op;
199         int result = 0;
200 @@ -129,6 +128,6 @@ void bind_virq_for_mce(void)
201                 printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
202  
203         /* Log the machine checks left over from the previous reset. */
204 -       mce_dom0_interrupt(VIRQ_MCA, NULL, NULL);
205 +       mce_dom0_interrupt(VIRQ_MCA, NULL);
206  }
207  
208 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/entry_32-xen.S     2009-05-19 09:16:41.000000000 +0200
209 +++ sle11sp1-2010-03-29/arch/x86/kernel/entry_32-xen.S  2009-11-06 10:45:48.000000000 +0100
210 @@ -80,8 +80,12 @@ VM_MASK              = 0x00020000
211  NMI_MASK       = 0x80000000
212  
213  #ifndef CONFIG_XEN
214 -#define DISABLE_INTERRUPTS     cli
215 -#define ENABLE_INTERRUPTS      sti
216 +/* These are replaces for paravirtualization */
217 +#define DISABLE_INTERRUPTS             cli
218 +#define ENABLE_INTERRUPTS              sti
219 +#define ENABLE_INTERRUPTS_SYSEXIT      sti; sysexit
220 +#define INTERRUPT_RETURN               iret
221 +#define GET_CR0_INTO_EAX               movl %cr0, %eax
222  #else
223  /* Offsets into shared_info_t. */
224  #define evtchn_upcall_pending          /* 0 */
225 @@ -99,15 +103,29 @@ NMI_MASK   = 0x80000000
226  
227  #define __DISABLE_INTERRUPTS   movb $1,evtchn_upcall_mask(%esi)
228  #define __ENABLE_INTERRUPTS    movb $0,evtchn_upcall_mask(%esi)
229 +#define __TEST_PENDING         testb $0xFF,evtchn_upcall_pending(%esi)
230  #define DISABLE_INTERRUPTS     GET_VCPU_INFO                           ; \
231                                 __DISABLE_INTERRUPTS
232  #define ENABLE_INTERRUPTS      GET_VCPU_INFO                           ; \
233                                 __ENABLE_INTERRUPTS
234 -#define __TEST_PENDING         testb $0xFF,evtchn_upcall_pending(%esi)
235 +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS                  ; \
236 +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/            ; \
237 +       __TEST_PENDING                                                  ; \
238 +       jnz  14f                # process more events if necessary...   ; \
239 +       movl ESI(%esp), %esi                                            ; \
240 +       sysexit                                                         ; \
241 +14:    __DISABLE_INTERRUPTS                                            ; \
242 +       TRACE_IRQS_OFF                                                  ; \
243 +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/              ; \
244 +       push %esp                                                       ; \
245 +       call evtchn_do_upcall                                           ; \
246 +       add  $4,%esp                                                    ; \
247 +       jmp  ret_from_intr
248 +#define INTERRUPT_RETURN       iret
249  #endif
250  
251  #ifdef CONFIG_PREEMPT
252 -#define preempt_stop           cli; TRACE_IRQS_OFF
253 +#define preempt_stop           DISABLE_INTERRUPTS; TRACE_IRQS_OFF
254  #else
255  #define preempt_stop
256  #define resume_kernel          restore_nocheck
257 @@ -206,18 +224,21 @@ NMI_MASK  = 0x80000000
258  
259  #define RING0_INT_FRAME \
260         CFI_STARTPROC simple;\
261 +       CFI_SIGNAL_FRAME;\
262         CFI_DEF_CFA esp, 3*4;\
263         /*CFI_OFFSET cs, -2*4;*/\
264         CFI_OFFSET eip, -3*4
265  
266  #define RING0_EC_FRAME \
267         CFI_STARTPROC simple;\
268 +       CFI_SIGNAL_FRAME;\
269         CFI_DEF_CFA esp, 4*4;\
270         /*CFI_OFFSET cs, -2*4;*/\
271         CFI_OFFSET eip, -3*4
272  
273  #define RING0_PTREGS_FRAME \
274         CFI_STARTPROC simple;\
275 +       CFI_SIGNAL_FRAME;\
276         CFI_DEF_CFA esp, OLDESP-EBX;\
277         /*CFI_OFFSET cs, CS-OLDESP;*/\
278         CFI_OFFSET eip, EIP-OLDESP;\
279 @@ -263,8 +284,9 @@ ret_from_intr:
280  check_userspace:
281         movl EFLAGS(%esp), %eax         # mix EFLAGS and CS
282         movb CS(%esp), %al
283 -       testl $(VM_MASK | 2), %eax
284 -       jz resume_kernel
285 +       andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
286 +       cmpl $USER_RPL, %eax
287 +       jb resume_kernel                # not returning to v8086 or userspace
288  ENTRY(resume_userspace)
289         DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
290                                         # setting need_resched or sigpending
291 @@ -277,7 +299,7 @@ ENTRY(resume_userspace)
292  
293  #ifdef CONFIG_PREEMPT
294  ENTRY(resume_kernel)
295 -       cli
296 +       DISABLE_INTERRUPTS
297         cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
298         jnz restore_nocheck
299  need_resched:
300 @@ -297,6 +319,7 @@ need_resched:
301         # sysenter call handler stub
302  ENTRY(sysenter_entry)
303         CFI_STARTPROC simple
304 +       CFI_SIGNAL_FRAME
305         CFI_DEF_CFA esp, 0
306         CFI_REGISTER esp, ebp
307         movl SYSENTER_stack_esp0(%esp),%esp
308 @@ -305,7 +328,7 @@ sysenter_past_esp:
309          * No need to follow this irqs on/off section: the syscall
310          * disabled irqs and here we enable it straight after entry:
311          */
312 -       sti
313 +       ENABLE_INTERRUPTS
314         pushl $(__USER_DS)
315         CFI_ADJUST_CFA_OFFSET 4
316         /*CFI_REL_OFFSET ss, 0*/
317 @@ -359,26 +382,8 @@ sysenter_past_esp:
318         movl EIP(%esp), %edx
319         movl OLDESP(%esp), %ecx
320         xorl %ebp,%ebp
321 -#ifdef CONFIG_XEN
322         TRACE_IRQS_ON
323 -       __ENABLE_INTERRUPTS
324 -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
325 -       __TEST_PENDING
326 -       jnz  14f                        # process more events if necessary...
327 -       movl ESI(%esp), %esi
328 -       sysexit
329 -14:    __DISABLE_INTERRUPTS
330 -       TRACE_IRQS_OFF
331 -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
332 -       push %esp
333 -       call evtchn_do_upcall
334 -       add  $4,%esp
335 -       jmp  ret_from_intr
336 -#else
337 -       TRACE_IRQS_ON
338 -       sti
339 -       sysexit
340 -#endif /* !CONFIG_XEN */
341 +       ENABLE_INTERRUPTS_SYSEXIT
342         CFI_ENDPROC
343  
344         # pv sysenter call handler stub
345 @@ -444,8 +449,8 @@ restore_all:
346         # See comments in process.c:copy_thread() for details.
347         movb OLDSS(%esp), %ah
348         movb CS(%esp), %al
349 -       andl $(VM_MASK | (4 << 8) | 3), %eax
350 -       cmpl $((4 << 8) | 3), %eax
351 +       andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
352 +       cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
353         CFI_REMEMBER_STATE
354         je ldt_ss                       # returning to user-space with LDT SS
355  restore_nocheck:
356 @@ -467,12 +472,11 @@ restore_nocheck_notrace:
357         RESTORE_REGS
358         addl $4, %esp
359         CFI_ADJUST_CFA_OFFSET -4
360 -1:     iret
361 +1:     INTERRUPT_RETURN
362  .section .fixup,"ax"
363  iret_exc:
364  #ifndef CONFIG_XEN
365 -       TRACE_IRQS_ON
366 -       sti
367 +       ENABLE_INTERRUPTS
368  #endif
369         pushl $0                        # no error code
370         pushl $do_iret_error
371 @@ -498,7 +502,7 @@ ldt_ss:
372          * dosemu and wine happy. */
373         subl $8, %esp           # reserve space for switch16 pointer
374         CFI_ADJUST_CFA_OFFSET 8
375 -       cli
376 +       DISABLE_INTERRUPTS
377         TRACE_IRQS_OFF
378         movl %esp, %eax
379         /* Set up the 16bit stack frame with switch32 pointer on top,
380 @@ -508,7 +512,7 @@ ldt_ss:
381         TRACE_IRQS_IRET
382         RESTORE_REGS
383         lss 20+4(%esp), %esp    # switch to 16bit stack
384 -1:     iret
385 +1:     INTERRUPT_RETURN
386  .section __ex_table,"a"
387         .align 4
388         .long 1b,iret_exc
389 @@ -524,7 +528,7 @@ scrit:      /**** START OF CRITICAL REGION **
390         RESTORE_REGS
391         addl $4, %esp
392         CFI_ADJUST_CFA_OFFSET -4
393 -1:     iret
394 +1:     INTERRUPT_RETURN
395  .section __ex_table,"a"
396         .align 4
397         .long 1b,iret_exc
398 @@ -713,11 +717,9 @@ ENTRY(name)                                \
399  #define UNWIND_ESPFIX_STACK
400  #endif
401  
402 -ENTRY(divide_error)
403 -       RING0_INT_FRAME
404 -       pushl $0                        # no error code
405 -       CFI_ADJUST_CFA_OFFSET 4
406 -       pushl $do_divide_error
407 +KPROBE_ENTRY(page_fault)
408 +       RING0_EC_FRAME
409 +       pushl $do_page_fault
410         CFI_ADJUST_CFA_OFFSET 4
411         ALIGN
412  error_code:
413 @@ -767,6 +769,7 @@ error_code:
414         call *%edi
415         jmp ret_from_exception
416         CFI_ENDPROC
417 +KPROBE_END(page_fault)
418  
419  #ifdef CONFIG_XEN
420  # A note on the "critical region" in our callback handler.
421 @@ -790,9 +793,11 @@ ENTRY(hypervisor_callback)
422         pushl %eax
423         CFI_ADJUST_CFA_OFFSET 4
424         SAVE_ALL
425 -       testb $2,CS(%esp)
426 +       movl CS(%esp),%ecx
427         movl EIP(%esp),%eax
428 -       jnz  .Ldo_upcall
429 +       andl $SEGMENT_RPL_MASK,%ecx
430 +       cmpl $USER_RPL,%ecx
431 +       jae  .Ldo_upcall
432         cmpl $scrit,%eax
433         jb   0f
434         cmpl $ecrit,%eax
435 @@ -928,7 +933,7 @@ ENTRY(device_not_available)
436         CFI_ADJUST_CFA_OFFSET 4
437         SAVE_ALL
438  #ifndef CONFIG_XEN
439 -       movl %cr0, %eax
440 +       GET_CR0_INTO_EAX
441         testl $0x4, %eax                # EM (math emulation bit)
442         je device_available_emulate
443         pushl $0                        # temporary storage for ORIG_EIP
444 @@ -963,9 +968,15 @@ device_available_emulate:
445         jne ok;                                 \
446  label:                                         \
447         movl SYSENTER_stack_esp0+offset(%esp),%esp;     \
448 +       CFI_DEF_CFA esp, 0;                     \
449 +       CFI_UNDEFINED eip;                      \
450         pushfl;                                 \
451 +       CFI_ADJUST_CFA_OFFSET 4;                \
452         pushl $__KERNEL_CS;                     \
453 -       pushl $sysenter_past_esp
454 +       CFI_ADJUST_CFA_OFFSET 4;                \
455 +       pushl $sysenter_past_esp;               \
456 +       CFI_ADJUST_CFA_OFFSET 4;                \
457 +       CFI_REL_OFFSET eip, 0
458  #endif /* CONFIG_XEN */
459  
460  KPROBE_ENTRY(debug)
461 @@ -984,7 +995,8 @@ debug_stack_correct:
462         call do_debug
463         jmp ret_from_exception
464         CFI_ENDPROC
465 -       .previous .text
466 +KPROBE_END(debug)
467 +
468  #ifndef CONFIG_XEN
469  /*
470   * NMI is doubly nasty. It can happen _while_ we're handling
471 @@ -994,7 +1006,7 @@ debug_stack_correct:
472   * check whether we got an NMI on the debug path where the debug
473   * fault happened on the sysenter path.
474   */
475 -ENTRY(nmi)
476 +KPROBE_ENTRY(nmi)
477         RING0_INT_FRAME
478         pushl %eax
479         CFI_ADJUST_CFA_OFFSET 4
480 @@ -1019,6 +1031,7 @@ ENTRY(nmi)
481         cmpl $sysenter_entry,12(%esp)
482         je nmi_debug_stack_check
483  nmi_stack_correct:
484 +       /* We have a RING0_INT_FRAME here */
485         pushl %eax
486         CFI_ADJUST_CFA_OFFSET 4
487         SAVE_ALL
488 @@ -1029,9 +1042,12 @@ nmi_stack_correct:
489         CFI_ENDPROC
490  
491  nmi_stack_fixup:
492 +       RING0_INT_FRAME
493         FIX_STACK(12,nmi_stack_correct, 1)
494         jmp nmi_stack_correct
495 +
496  nmi_debug_stack_check:
497 +       /* We have a RING0_INT_FRAME here */
498         cmpw $__KERNEL_CS,16(%esp)
499         jne nmi_stack_correct
500         cmpl $debug,(%esp)
501 @@ -1042,8 +1058,10 @@ nmi_debug_stack_check:
502         jmp nmi_stack_correct
503  
504  nmi_16bit_stack:
505 -       RING0_INT_FRAME
506 -       /* create the pointer to lss back */
507 +       /* We have a RING0_INT_FRAME here.
508 +        *
509 +        * create the pointer to lss back
510 +        */
511         pushl %ss
512         CFI_ADJUST_CFA_OFFSET 4
513         pushl %esp
514 @@ -1064,14 +1082,14 @@ nmi_16bit_stack:
515         call do_nmi
516         RESTORE_REGS
517         lss 12+4(%esp), %esp            # back to 16bit stack
518 -1:     iret
519 +1:     INTERRUPT_RETURN
520         CFI_ENDPROC
521  .section __ex_table,"a"
522         .align 4
523         .long 1b,iret_exc
524  .previous
525  #else
526 -ENTRY(nmi)
527 +KPROBE_ENTRY(nmi)
528         RING0_INT_FRAME
529         pushl %eax
530         CFI_ADJUST_CFA_OFFSET 4
531 @@ -1083,6 +1101,7 @@ ENTRY(nmi)
532         jmp restore_all
533         CFI_ENDPROC
534  #endif
535 +KPROBE_END(nmi)
536  
537  KPROBE_ENTRY(int3)
538         RING0_INT_FRAME
539 @@ -1094,7 +1113,7 @@ KPROBE_ENTRY(int3)
540         call do_int3
541         jmp ret_from_exception
542         CFI_ENDPROC
543 -       .previous .text
544 +KPROBE_END(int3)
545  
546  ENTRY(overflow)
547         RING0_INT_FRAME
548 @@ -1159,7 +1178,7 @@ KPROBE_ENTRY(general_protection)
549         CFI_ADJUST_CFA_OFFSET 4
550         jmp error_code
551         CFI_ENDPROC
552 -       .previous .text
553 +KPROBE_END(general_protection)
554  
555  ENTRY(alignment_check)
556         RING0_EC_FRAME
557 @@ -1168,13 +1187,14 @@ ENTRY(alignment_check)
558         jmp error_code
559         CFI_ENDPROC
560  
561 -KPROBE_ENTRY(page_fault)
562 -       RING0_EC_FRAME
563 -       pushl $do_page_fault
564 +ENTRY(divide_error)
565 +       RING0_INT_FRAME
566 +       pushl $0                        # no error code
567 +       CFI_ADJUST_CFA_OFFSET 4
568 +       pushl $do_divide_error
569         CFI_ADJUST_CFA_OFFSET 4
570         jmp error_code
571         CFI_ENDPROC
572 -       .previous .text
573  
574  #ifdef CONFIG_X86_MCE
575  ENTRY(machine_check)
576 @@ -1236,6 +1256,19 @@ ENTRY(fixup_4gb_segment)
577         jmp error_code
578         CFI_ENDPROC
579  
580 +ENTRY(kernel_thread_helper)
581 +       pushl $0                # fake return address for unwinder
582 +       CFI_STARTPROC
583 +       movl %edx,%eax
584 +       push %edx
585 +       CFI_ADJUST_CFA_OFFSET 4
586 +       call *%ebx
587 +       push %eax
588 +       CFI_ADJUST_CFA_OFFSET 4
589 +       call do_exit
590 +       CFI_ENDPROC
591 +ENDPROC(kernel_thread_helper)
592 +
593  .section .rodata,"a"
594  #include "syscall_table.S"
595  
596 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/head_32-xen.S      2007-06-12 13:12:48.000000000 +0200
597 +++ sle11sp1-2010-03-29/arch/x86/kernel/head_32-xen.S   2009-11-06 10:45:48.000000000 +0100
598 @@ -62,7 +62,7 @@ ENTRY(startup_32)
599         movl %eax,%gs
600         cld                     # gcc2 wants the direction flag cleared at all times
601  
602 -       pushl %eax              # fake return address
603 +       pushl $0                # fake return address for unwinder
604         jmp start_kernel
605  
606  #define HYPERCALL_PAGE_OFFSET 0x1000
607 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/io_apic_32-xen.c   2009-03-18 10:39:31.000000000 +0100
608 +++ sle11sp1-2010-03-29/arch/x86/kernel/io_apic_32-xen.c        2009-11-06 10:45:48.000000000 +0100
609 @@ -31,6 +31,9 @@
610  #include <linux/acpi.h>
611  #include <linux/module.h>
612  #include <linux/sysdev.h>
613 +#include <linux/pci.h>
614 +#include <linux/msi.h>
615 +#include <linux/htirq.h>
616  
617  #include <asm/io.h>
618  #include <asm/smp.h>
619 @@ -38,13 +41,15 @@
620  #include <asm/timer.h>
621  #include <asm/i8259.h>
622  #include <asm/nmi.h>
623 +#include <asm/msidef.h>
624 +#include <asm/hypertransport.h>
625  
626  #include <mach_apic.h>
627 +#include <mach_apicdef.h>
628  
629  #include "io_ports.h"
630  
631  #ifdef CONFIG_XEN
632 -
633  #include <xen/interface/xen.h>
634  #include <xen/interface/physdev.h>
635  #include <xen/evtchn.h>
636 @@ -56,32 +61,7 @@
637  
638  unsigned long io_apic_irqs;
639  
640 -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
641 -{
642 -       struct physdev_apic apic_op;
643 -       int ret;
644 -
645 -       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
646 -       apic_op.reg = reg;
647 -       ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
648 -       if (ret)
649 -               return ret;
650 -       return apic_op.value;
651 -}
652 -
653 -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
654 -{
655 -       struct physdev_apic apic_op;
656 -
657 -       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
658 -       apic_op.reg = reg;
659 -       apic_op.value = value;
660 -       WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
661 -}
662 -
663 -#define io_apic_read(a,r)    xen_io_apic_read(a,r)
664 -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
665 -
666 +#define clear_IO_APIC() ((void)0)
667  #endif /* CONFIG_XEN */
668  
669  int (*ioapic_renumber_irq)(int ioapic, int irq);
670 @@ -108,7 +88,7 @@ int sis_apic_bug = -1;
671   */
672  int nr_ioapic_registers[MAX_IO_APICS];
673  
674 -int disable_timer_pin_1 __initdata;
675 +static int disable_timer_pin_1 __initdata;
676  
677  /*
678   * Rough estimation of how many shared IRQs there are, can
679 @@ -128,12 +108,124 @@ static struct irq_pin_list {
680         int apic, pin, next;
681  } irq_2_pin[PIN_MAP_SIZE];
682  
683 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
684 -#ifdef CONFIG_PCI_MSI
685 -#define vector_to_irq(vector)  \
686 -       (platform_legacy_irq(vector) ? vector : vector_irq[vector])
687 +#ifndef CONFIG_XEN
688 +struct io_apic {
689 +       unsigned int index;
690 +       unsigned int unused[3];
691 +       unsigned int data;
692 +};
693 +
694 +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
695 +{
696 +       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
697 +               + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
698 +}
699 +#endif
700 +
701 +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
702 +{
703 +#ifndef CONFIG_XEN
704 +       struct io_apic __iomem *io_apic = io_apic_base(apic);
705 +       writel(reg, &io_apic->index);
706 +       return readl(&io_apic->data);
707 +#else
708 +       struct physdev_apic apic_op;
709 +       int ret;
710 +
711 +       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
712 +       apic_op.reg = reg;
713 +       ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
714 +       if (ret)
715 +               return ret;
716 +       return apic_op.value;
717 +#endif
718 +}
719 +
720 +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
721 +{
722 +#ifndef CONFIG_XEN
723 +       struct io_apic __iomem *io_apic = io_apic_base(apic);
724 +       writel(reg, &io_apic->index);
725 +       writel(value, &io_apic->data);
726 +#else
727 +       struct physdev_apic apic_op;
728 +
729 +       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
730 +       apic_op.reg = reg;
731 +       apic_op.value = value;
732 +       WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
733 +#endif
734 +}
735 +
736 +#ifndef CONFIG_XEN
737 +/*
738 + * Re-write a value: to be used for read-modify-write
739 + * cycles where the read already set up the index register.
740 + *
741 + * Older SiS APIC requires we rewrite the index register
742 + */
743 +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
744 +{
745 +       volatile struct io_apic *io_apic = io_apic_base(apic);
746 +       if (sis_apic_bug)
747 +               writel(reg, &io_apic->index);
748 +       writel(value, &io_apic->data);
749 +}
750  #else
751 -#define vector_to_irq(vector)  (vector)
752 +#define io_apic_modify io_apic_write
753 +#endif
754 +
755 +union entry_union {
756 +       struct { u32 w1, w2; };
757 +       struct IO_APIC_route_entry entry;
758 +};
759 +
760 +#ifndef CONFIG_XEN
761 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
762 +{
763 +       union entry_union eu;
764 +       unsigned long flags;
765 +       spin_lock_irqsave(&ioapic_lock, flags);
766 +       eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
767 +       eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
768 +       spin_unlock_irqrestore(&ioapic_lock, flags);
769 +       return eu.entry;
770 +}
771 +#endif
772 +
773 +/*
774 + * When we write a new IO APIC routing entry, we need to write the high
775 + * word first! If the mask bit in the low word is clear, we will enable
776 + * the interrupt, and we need to make sure the entry is fully populated
777 + * before that happens.
778 + */
779 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
780 +{
781 +       unsigned long flags;
782 +       union entry_union eu;
783 +       eu.entry = e;
784 +       spin_lock_irqsave(&ioapic_lock, flags);
785 +       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
786 +       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
787 +       spin_unlock_irqrestore(&ioapic_lock, flags);
788 +}
789 +
790 +#ifndef CONFIG_XEN
791 +/*
792 + * When we mask an IO APIC routing entry, we need to write the low
793 + * word first, in order to set the mask bit before we change the
794 + * high bits!
795 + */
796 +static void ioapic_mask_entry(int apic, int pin)
797 +{
798 +       unsigned long flags;
799 +       union entry_union eu = { .entry.mask = 1 };
800 +
801 +       spin_lock_irqsave(&ioapic_lock, flags);
802 +       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
803 +       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
804 +       spin_unlock_irqrestore(&ioapic_lock, flags);
805 +}
806  #endif
807  
808  /*
809 @@ -159,9 +251,7 @@ static void add_pin_to_irq(unsigned int 
810         entry->pin = pin;
811  }
812  
813 -#ifdef CONFIG_XEN
814 -#define clear_IO_APIC() ((void)0)
815 -#else
816 +#ifndef CONFIG_XEN
817  /*
818   * Reroute an IRQ to a different pin.
819   */
820 @@ -246,25 +336,16 @@ static void unmask_IO_APIC_irq (unsigned
821  static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
822  {
823         struct IO_APIC_route_entry entry;
824 -       unsigned long flags;
825         
826         /* Check delivery_mode to be sure we're not clearing an SMI pin */
827 -       spin_lock_irqsave(&ioapic_lock, flags);
828 -       *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
829 -       *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
830 -       spin_unlock_irqrestore(&ioapic_lock, flags);
831 +       entry = ioapic_read_entry(apic, pin);
832         if (entry.delivery_mode == dest_SMI)
833                 return;
834  
835         /*
836          * Disable it in the IO-APIC irq-routing table:
837          */
838 -       memset(&entry, 0, sizeof(entry));
839 -       entry.mask = 1;
840 -       spin_lock_irqsave(&ioapic_lock, flags);
841 -       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
842 -       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
843 -       spin_unlock_irqrestore(&ioapic_lock, flags);
844 +       ioapic_mask_entry(apic, pin);
845  }
846  
847  static void clear_IO_APIC (void)
848 @@ -304,7 +385,7 @@ static void set_ioapic_affinity_irq(unsi
849                         break;
850                 entry = irq_2_pin + entry->next;
851         }
852 -       set_irq_info(irq, cpumask);
853 +       set_native_irq_info(irq, cpumask);
854         spin_unlock_irqrestore(&ioapic_lock, flags);
855  }
856  
857 @@ -1212,43 +1293,43 @@ static inline int IO_APIC_irq_trigger(in
858  /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
859  u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
860  
861 -int assign_irq_vector(int irq)
862 +static int __assign_irq_vector(int irq)
863  {
864 -       unsigned long flags;
865         int vector;
866         struct physdev_irq irq_op;
867  
868 -       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
869 +       BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
870  
871         if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
872                 return -EINVAL;
873  
874 -       spin_lock_irqsave(&vector_lock, flags);
875 -
876 -       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
877 -               spin_unlock_irqrestore(&vector_lock, flags);
878 -               return IO_APIC_VECTOR(irq);
879 -       }
880 +       if (irq_vector[irq] > 0)
881 +               return irq_vector[irq];
882  
883         irq_op.irq = irq;
884 -       if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
885 -               spin_unlock_irqrestore(&vector_lock, flags);
886 +       if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
887                 return -ENOSPC;
888 -       }
889  
890         vector = irq_op.vector;
891 -       vector_irq[vector] = irq;
892 -       if (irq != AUTO_ASSIGN)
893 -               IO_APIC_VECTOR(irq) = vector;
894 +       irq_vector[irq] = vector;
895 +
896 +       return vector;
897 +}
898 +
899 +static int assign_irq_vector(int irq)
900 +{
901 +       unsigned long flags;
902 +       int vector;
903  
904 +       spin_lock_irqsave(&vector_lock, flags);
905 +       vector = __assign_irq_vector(irq);
906         spin_unlock_irqrestore(&vector_lock, flags);
907  
908         return vector;
909  }
910  
911  #ifndef CONFIG_XEN
912 -static struct hw_interrupt_type ioapic_level_type;
913 -static struct hw_interrupt_type ioapic_edge_type;
914 +static struct irq_chip ioapic_chip;
915  
916  #define IOAPIC_AUTO    -1
917  #define IOAPIC_EDGE    0
918 @@ -1256,16 +1337,16 @@ static struct hw_interrupt_type ioapic_e
919  
920  static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
921  {
922 -       unsigned idx;
923 -
924 -       idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
925 -
926         if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
927                         trigger == IOAPIC_LEVEL)
928 -               irq_desc[idx].chip = &ioapic_level_type;
929 -       else
930 -               irq_desc[idx].chip = &ioapic_edge_type;
931 -       set_intr_gate(vector, interrupt[idx]);
932 +               set_irq_chip_and_handler_name(irq, &ioapic_chip,
933 +                                        handle_fasteoi_irq, "fasteoi");
934 +       else {
935 +               irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
936 +               set_irq_chip_and_handler_name(irq, &ioapic_chip,
937 +                                        handle_edge_irq, "edge");
938 +       }
939 +       set_intr_gate(vector, interrupt[irq]);
940  }
941  #else
942  #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
943 @@ -1336,9 +1417,8 @@ static void __init setup_IO_APIC_irqs(vo
944                         if (!apic && (irq < 16))
945                                 disable_8259A_irq(irq);
946                 }
947 +               ioapic_write_entry(apic, pin, entry);
948                 spin_lock_irqsave(&ioapic_lock, flags);
949 -               io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
950 -               io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
951                 set_native_irq_info(irq, TARGET_CPUS);
952                 spin_unlock_irqrestore(&ioapic_lock, flags);
953         }
954 @@ -1355,7 +1435,6 @@ static void __init setup_IO_APIC_irqs(vo
955  static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
956  {
957         struct IO_APIC_route_entry entry;
958 -       unsigned long flags;
959  
960         memset(&entry,0,sizeof(entry));
961  
962 @@ -1380,15 +1459,13 @@ static void __init setup_ExtINT_IRQ0_pin
963          * The timer IRQ doesn't have to know that behind the
964          * scene we have a 8259A-master in AEOI mode ...
965          */
966 -       irq_desc[0].chip = &ioapic_edge_type;
967 +       irq_desc[0].chip = &ioapic_chip;
968 +       set_irq_handler(0, handle_edge_irq);
969  
970         /*
971          * Add it to the IO-APIC irq-routing table:
972          */
973 -       spin_lock_irqsave(&ioapic_lock, flags);
974 -       io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
975 -       io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
976 -       spin_unlock_irqrestore(&ioapic_lock, flags);
977 +       ioapic_write_entry(apic, pin, entry);
978  
979         enable_8259A_irq(0);
980  }
981 @@ -1498,10 +1575,7 @@ void __init print_IO_APIC(void)
982         for (i = 0; i <= reg_01.bits.entries; i++) {
983                 struct IO_APIC_route_entry entry;
984  
985 -               spin_lock_irqsave(&ioapic_lock, flags);
986 -               *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
987 -               *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
988 -               spin_unlock_irqrestore(&ioapic_lock, flags);
989 +               entry = ioapic_read_entry(apic, i);
990  
991                 printk(KERN_DEBUG " %02x %03X %02X  ",
992                         i,
993 @@ -1521,17 +1595,12 @@ void __init print_IO_APIC(void)
994                 );
995         }
996         }
997 -       if (use_pci_vector())
998 -               printk(KERN_INFO "Using vector-based indexing\n");
999         printk(KERN_DEBUG "IRQ to pin mappings:\n");
1000         for (i = 0; i < NR_IRQS; i++) {
1001                 struct irq_pin_list *entry = irq_2_pin + i;
1002                 if (entry->pin < 0)
1003                         continue;
1004 -               if (use_pci_vector() && !platform_legacy_irq(i))
1005 -                       printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
1006 -               else
1007 -                       printk(KERN_DEBUG "IRQ%d ", i);
1008 +               printk(KERN_DEBUG "IRQ%d ", i);
1009                 for (;;) {
1010                         printk("-> %d:%d", entry->apic, entry->pin);
1011                         if (!entry->next)
1012 @@ -1720,10 +1789,7 @@ static void __init enable_IO_APIC(void)
1013                 /* See if any of the pins is in ExtINT mode */
1014                 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1015                         struct IO_APIC_route_entry entry;
1016 -                       spin_lock_irqsave(&ioapic_lock, flags);
1017 -                       *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1018 -                       *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1019 -                       spin_unlock_irqrestore(&ioapic_lock, flags);
1020 +                       entry = ioapic_read_entry(apic, pin);
1021  
1022  
1023                         /* If the interrupt line is enabled and in ExtInt mode
1024 @@ -1782,7 +1848,6 @@ void disable_IO_APIC(void)
1025          */
1026         if (ioapic_i8259.pin != -1) {
1027                 struct IO_APIC_route_entry entry;
1028 -               unsigned long flags;
1029  
1030                 memset(&entry, 0, sizeof(entry));
1031                 entry.mask            = 0; /* Enabled */
1032 @@ -1799,12 +1864,7 @@ void disable_IO_APIC(void)
1033                 /*
1034                  * Add it to the IO-APIC irq-routing table:
1035                  */
1036 -               spin_lock_irqsave(&ioapic_lock, flags);
1037 -               io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
1038 -                       *(((int *)&entry)+1));
1039 -               io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
1040 -                       *(((int *)&entry)+0));
1041 -               spin_unlock_irqrestore(&ioapic_lock, flags);
1042 +               ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1043         }
1044         disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1045  #endif
1046 @@ -1971,6 +2031,8 @@ static int __init timer_irq_works(void)
1047   */
1048  
1049  /*
1050 + * Startup quirk:
1051 + *
1052   * Starting up a edge-triggered IO-APIC interrupt is
1053   * nasty - we need to make sure that we get the edge.
1054   * If it is already asserted for some reason, we need
1055 @@ -1978,8 +2040,10 @@ static int __init timer_irq_works(void)
1056   *
1057   * This is not complete - we should be able to fake
1058   * an edge even if it isn't on the 8259A...
1059 + *
1060 + * (We do this for level-triggered IRQs too - it cannot hurt.)
1061   */
1062 -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
1063 +static unsigned int startup_ioapic_irq(unsigned int irq)
1064  {
1065         int was_pending = 0;
1066         unsigned long flags;
1067 @@ -1996,47 +2060,18 @@ static unsigned int startup_edge_ioapic_
1068         return was_pending;
1069  }
1070  
1071 -/*
1072 - * Once we have recorded IRQ_PENDING already, we can mask the
1073 - * interrupt for real. This prevents IRQ storms from unhandled
1074 - * devices.
1075 - */
1076 -static void ack_edge_ioapic_irq(unsigned int irq)
1077 -{
1078 -       move_irq(irq);
1079 -       if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
1080 -                                       == (IRQ_PENDING | IRQ_DISABLED))
1081 -               mask_IO_APIC_irq(irq);
1082 -       ack_APIC_irq();
1083 -}
1084 -
1085 -/*
1086 - * Level triggered interrupts can just be masked,
1087 - * and shutting down and starting up the interrupt
1088 - * is the same as enabling and disabling them -- except
1089 - * with a startup need to return a "was pending" value.
1090 - *
1091 - * Level triggered interrupts are special because we
1092 - * do not touch any IO-APIC register while handling
1093 - * them. We ack the APIC in the end-IRQ handler, not
1094 - * in the start-IRQ-handler. Protection against reentrance
1095 - * from the same interrupt is still provided, both by the
1096 - * generic IRQ layer and by the fact that an unacked local
1097 - * APIC does not accept IRQs.
1098 - */
1099 -static unsigned int startup_level_ioapic_irq (unsigned int irq)
1100 +static void ack_ioapic_irq(unsigned int irq)
1101  {
1102 -       unmask_IO_APIC_irq(irq);
1103 -
1104 -       return 0; /* don't check for pending */
1105 +       move_native_irq(irq);
1106 +       ack_APIC_irq();
1107  }
1108  
1109 -static void end_level_ioapic_irq (unsigned int irq)
1110 +static void ack_ioapic_quirk_irq(unsigned int irq)
1111  {
1112         unsigned long v;
1113         int i;
1114  
1115 -       move_irq(irq);
1116 +       move_native_irq(irq);
1117  /*
1118   * It appears there is an erratum which affects at least version 0x11
1119   * of I/O APIC (that's the 82093AA and cores integrated into various
1120 @@ -2056,7 +2091,7 @@ static void end_level_ioapic_irq (unsign
1121   * operation to prevent an edge-triggered interrupt escaping meanwhile.
1122   * The idea is from Manfred Spraul.  --macro
1123   */
1124 -       i = IO_APIC_VECTOR(irq);
1125 +       i = irq_vector[irq];
1126  
1127         v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1128  
1129 @@ -2071,104 +2106,24 @@ static void end_level_ioapic_irq (unsign
1130         }
1131  }
1132  
1133 -#ifdef CONFIG_PCI_MSI
1134 -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
1135 -{
1136 -       int irq = vector_to_irq(vector);
1137 -
1138 -       return startup_edge_ioapic_irq(irq);
1139 -}
1140 -
1141 -static void ack_edge_ioapic_vector(unsigned int vector)
1142 -{
1143 -       int irq = vector_to_irq(vector);
1144 -
1145 -       move_native_irq(vector);
1146 -       ack_edge_ioapic_irq(irq);
1147 -}
1148 -
1149 -static unsigned int startup_level_ioapic_vector (unsigned int vector)
1150 -{
1151 -       int irq = vector_to_irq(vector);
1152 -
1153 -       return startup_level_ioapic_irq (irq);
1154 -}
1155 -
1156 -static void end_level_ioapic_vector (unsigned int vector)
1157 -{
1158 -       int irq = vector_to_irq(vector);
1159 -
1160 -       move_native_irq(vector);
1161 -       end_level_ioapic_irq(irq);
1162 -}
1163 -
1164 -static void mask_IO_APIC_vector (unsigned int vector)
1165 -{
1166 -       int irq = vector_to_irq(vector);
1167 -
1168 -       mask_IO_APIC_irq(irq);
1169 -}
1170 -
1171 -static void unmask_IO_APIC_vector (unsigned int vector)
1172 +static int ioapic_retrigger_irq(unsigned int irq)
1173  {
1174 -       int irq = vector_to_irq(vector);
1175 -
1176 -       unmask_IO_APIC_irq(irq);
1177 -}
1178 -
1179 -#ifdef CONFIG_SMP
1180 -static void set_ioapic_affinity_vector (unsigned int vector,
1181 -                                       cpumask_t cpu_mask)
1182 -{
1183 -       int irq = vector_to_irq(vector);
1184 -
1185 -       set_native_irq_info(vector, cpu_mask);
1186 -       set_ioapic_affinity_irq(irq, cpu_mask);
1187 -}
1188 -#endif
1189 -#endif
1190 -
1191 -static int ioapic_retrigger(unsigned int irq)
1192 -{
1193 -       send_IPI_self(IO_APIC_VECTOR(irq));
1194 +       send_IPI_self(irq_vector[irq]);
1195  
1196         return 1;
1197  }
1198  
1199 -/*
1200 - * Level and edge triggered IO-APIC interrupts need different handling,
1201 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1202 - * handled with the level-triggered descriptor, but that one has slightly
1203 - * more overhead. Level-triggered interrupts cannot be handled with the
1204 - * edge-triggered handler, without risking IRQ storms and other ugly
1205 - * races.
1206 - */
1207 -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
1208 -       .typename       = "IO-APIC-edge",
1209 -       .startup        = startup_edge_ioapic,
1210 -       .shutdown       = shutdown_edge_ioapic,
1211 -       .enable         = enable_edge_ioapic,
1212 -       .disable        = disable_edge_ioapic,
1213 -       .ack            = ack_edge_ioapic,
1214 -       .end            = end_edge_ioapic,
1215 -#ifdef CONFIG_SMP
1216 -       .set_affinity   = set_ioapic_affinity,
1217 -#endif
1218 -       .retrigger      = ioapic_retrigger,
1219 -};
1220 -
1221 -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
1222 -       .typename       = "IO-APIC-level",
1223 -       .startup        = startup_level_ioapic,
1224 -       .shutdown       = shutdown_level_ioapic,
1225 -       .enable         = enable_level_ioapic,
1226 -       .disable        = disable_level_ioapic,
1227 -       .ack            = mask_and_ack_level_ioapic,
1228 -       .end            = end_level_ioapic,
1229 +static struct irq_chip ioapic_chip __read_mostly = {
1230 +       .name           = "IO-APIC",
1231 +       .startup        = startup_ioapic_irq,
1232 +       .mask           = mask_IO_APIC_irq,
1233 +       .unmask         = unmask_IO_APIC_irq,
1234 +       .ack            = ack_ioapic_irq,
1235 +       .eoi            = ack_ioapic_quirk_irq,
1236  #ifdef CONFIG_SMP
1237 -       .set_affinity   = set_ioapic_affinity,
1238 +       .set_affinity   = set_ioapic_affinity_irq,
1239  #endif
1240 -       .retrigger      = ioapic_retrigger,
1241 +       .retrigger      = ioapic_retrigger_irq,
1242  };
1243  #endif /* !CONFIG_XEN */
1244  
1245 @@ -2189,12 +2144,7 @@ static inline void init_IO_APIC_traps(vo
1246          */
1247         for (irq = 0; irq < NR_IRQS ; irq++) {
1248                 int tmp = irq;
1249 -               if (use_pci_vector()) {
1250 -                       if (!platform_legacy_irq(tmp))
1251 -                               if ((tmp = vector_to_irq(tmp)) == -1)
1252 -                                       continue;
1253 -               }
1254 -               if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
1255 +               if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
1256                         /*
1257                          * Hmm.. We don't have an entry for this,
1258                          * so default to an old-fashioned 8259
1259 @@ -2205,22 +2155,23 @@ static inline void init_IO_APIC_traps(vo
1260  #ifndef CONFIG_XEN
1261                         else
1262                                 /* Strange. Oh, well.. */
1263 -                               irq_desc[irq].chip = &no_irq_type;
1264 +                               irq_desc[irq].chip = &no_irq_chip;
1265  #endif
1266                 }
1267         }
1268  }
1269  
1270  #ifndef CONFIG_XEN
1271 -static void enable_lapic_irq (unsigned int irq)
1272 -{
1273 -       unsigned long v;
1274 +/*
1275 + * The local APIC irq-chip implementation:
1276 + */
1277  
1278 -       v = apic_read(APIC_LVT0);
1279 -       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1280 +static void ack_apic(unsigned int irq)
1281 +{
1282 +       ack_APIC_irq();
1283  }
1284  
1285 -static void disable_lapic_irq (unsigned int irq)
1286 +static void mask_lapic_irq (unsigned int irq)
1287  {
1288         unsigned long v;
1289  
1290 @@ -2228,21 +2179,19 @@ static void disable_lapic_irq (unsigned 
1291         apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
1292  }
1293  
1294 -static void ack_lapic_irq (unsigned int irq)
1295 +static void unmask_lapic_irq (unsigned int irq)
1296  {
1297 -       ack_APIC_irq();
1298 -}
1299 +       unsigned long v;
1300  
1301 -static void end_lapic_irq (unsigned int i) { /* nothing */ }
1302 +       v = apic_read(APIC_LVT0);
1303 +       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1304 +}
1305  
1306 -static struct hw_interrupt_type lapic_irq_type __read_mostly = {
1307 -       .typename       = "local-APIC-edge",
1308 -       .startup        = NULL, /* startup_irq() not used for IRQ0 */
1309 -       .shutdown       = NULL, /* shutdown_irq() not used for IRQ0 */
1310 -       .enable         = enable_lapic_irq,
1311 -       .disable        = disable_lapic_irq,
1312 -       .ack            = ack_lapic_irq,
1313 -       .end            = end_lapic_irq
1314 +static struct irq_chip lapic_chip __read_mostly = {
1315 +       .name           = "local-APIC-edge",
1316 +       .mask           = mask_lapic_irq,
1317 +       .unmask         = unmask_lapic_irq,
1318 +       .eoi            = ack_apic,
1319  };
1320  
1321  static void setup_nmi (void)
1322 @@ -2275,17 +2224,13 @@ static inline void unlock_ExtINT_logic(v
1323         int apic, pin, i;
1324         struct IO_APIC_route_entry entry0, entry1;
1325         unsigned char save_control, save_freq_select;
1326 -       unsigned long flags;
1327  
1328         pin  = find_isa_irq_pin(8, mp_INT);
1329         apic = find_isa_irq_apic(8, mp_INT);
1330         if (pin == -1)
1331                 return;
1332  
1333 -       spin_lock_irqsave(&ioapic_lock, flags);
1334 -       *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1335 -       *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1336 -       spin_unlock_irqrestore(&ioapic_lock, flags);
1337 +       entry0 = ioapic_read_entry(apic, pin);
1338         clear_IO_APIC_pin(apic, pin);
1339  
1340         memset(&entry1, 0, sizeof(entry1));
1341 @@ -2298,10 +2243,7 @@ static inline void unlock_ExtINT_logic(v
1342         entry1.trigger = 0;
1343         entry1.vector = 0;
1344  
1345 -       spin_lock_irqsave(&ioapic_lock, flags);
1346 -       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
1347 -       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
1348 -       spin_unlock_irqrestore(&ioapic_lock, flags);
1349 +       ioapic_write_entry(apic, pin, entry1);
1350  
1351         save_control = CMOS_READ(RTC_CONTROL);
1352         save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
1353 @@ -2320,10 +2262,7 @@ static inline void unlock_ExtINT_logic(v
1354         CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
1355         clear_IO_APIC_pin(apic, pin);
1356  
1357 -       spin_lock_irqsave(&ioapic_lock, flags);
1358 -       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
1359 -       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
1360 -       spin_unlock_irqrestore(&ioapic_lock, flags);
1361 +       ioapic_write_entry(apic, pin, entry0);
1362  }
1363  
1364  int timer_uses_ioapic_pin_0;
1365 @@ -2423,7 +2362,8 @@ static inline void check_timer(void)
1366         printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1367  
1368         disable_8259A_irq(0);
1369 -       irq_desc[0].chip = &lapic_irq_type;
1370 +       set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
1371 +                                     "fasteio");
1372         apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
1373         enable_8259A_irq(0);
1374  
1375 @@ -2537,17 +2477,12 @@ static int ioapic_suspend(struct sys_dev
1376  {
1377         struct IO_APIC_route_entry *entry;
1378         struct sysfs_ioapic_data *data;
1379 -       unsigned long flags;
1380         int i;
1381         
1382         data = container_of(dev, struct sysfs_ioapic_data, dev);
1383         entry = data->entry;
1384 -       spin_lock_irqsave(&ioapic_lock, flags);
1385 -       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1386 -               *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
1387 -               *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
1388 -       }
1389 -       spin_unlock_irqrestore(&ioapic_lock, flags);
1390 +       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1391 +               entry[i] = ioapic_read_entry(dev->id, i);
1392  
1393         return 0;
1394  }
1395 @@ -2569,11 +2504,9 @@ static int ioapic_resume(struct sys_devi
1396                 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
1397                 io_apic_write(dev->id, 0, reg_00.raw);
1398         }
1399 -       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1400 -               io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
1401 -               io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
1402 -       }
1403         spin_unlock_irqrestore(&ioapic_lock, flags);
1404 +       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1405 +               ioapic_write_entry(dev->id, i, entry[i]);
1406  
1407         return 0;
1408  }
1409 @@ -2619,8 +2552,240 @@ static int __init ioapic_init_sysfs(void
1410  
1411  device_initcall(ioapic_init_sysfs);
1412  
1413 +/*
1414 + * Dynamic irq allocate and deallocation
1415 + */
1416 +int create_irq(void)
1417 +{
1418 +       /* Allocate an unused irq */
1419 +       int irq, new, vector;
1420 +       unsigned long flags;
1421 +
1422 +       irq = -ENOSPC;
1423 +       spin_lock_irqsave(&vector_lock, flags);
1424 +       for (new = (NR_IRQS - 1); new >= 0; new--) {
1425 +               if (platform_legacy_irq(new))
1426 +                       continue;
1427 +               if (irq_vector[new] != 0)
1428 +                       continue;
1429 +               vector = __assign_irq_vector(new);
1430 +               if (likely(vector > 0))
1431 +                       irq = new;
1432 +               break;
1433 +       }
1434 +       spin_unlock_irqrestore(&vector_lock, flags);
1435 +
1436 +       if (irq >= 0) {
1437 +               set_intr_gate(vector, interrupt[irq]);
1438 +               dynamic_irq_init(irq);
1439 +       }
1440 +       return irq;
1441 +}
1442 +
1443 +void destroy_irq(unsigned int irq)
1444 +{
1445 +       unsigned long flags;
1446 +
1447 +       dynamic_irq_cleanup(irq);
1448 +
1449 +       spin_lock_irqsave(&vector_lock, flags);
1450 +       irq_vector[irq] = 0;
1451 +       spin_unlock_irqrestore(&vector_lock, flags);
1452 +}
1453 +
1454  #endif /* CONFIG_XEN */
1455  
1456 +/*
1457 + * MSI mesage composition
1458 + */
1459 +#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
1460 +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1461 +{
1462 +       int vector;
1463 +       unsigned dest;
1464 +
1465 +       vector = assign_irq_vector(irq);
1466 +       if (vector >= 0) {
1467 +               dest = cpu_mask_to_apicid(TARGET_CPUS);
1468 +
1469 +               msg->address_hi = MSI_ADDR_BASE_HI;
1470 +               msg->address_lo =
1471 +                       MSI_ADDR_BASE_LO |
1472 +                       ((INT_DEST_MODE == 0) ?
1473 +                               MSI_ADDR_DEST_MODE_PHYSICAL:
1474 +                               MSI_ADDR_DEST_MODE_LOGICAL) |
1475 +                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1476 +                               MSI_ADDR_REDIRECTION_CPU:
1477 +                               MSI_ADDR_REDIRECTION_LOWPRI) |
1478 +                       MSI_ADDR_DEST_ID(dest);
1479 +
1480 +               msg->data =
1481 +                       MSI_DATA_TRIGGER_EDGE |
1482 +                       MSI_DATA_LEVEL_ASSERT |
1483 +                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1484 +                               MSI_DATA_DELIVERY_FIXED:
1485 +                               MSI_DATA_DELIVERY_LOWPRI) |
1486 +                       MSI_DATA_VECTOR(vector);
1487 +       }
1488 +       return vector;
1489 +}
1490 +
1491 +#ifdef CONFIG_SMP
1492 +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1493 +{
1494 +       struct msi_msg msg;
1495 +       unsigned int dest;
1496 +       cpumask_t tmp;
1497 +       int vector;
1498 +
1499 +       cpus_and(tmp, mask, cpu_online_map);
1500 +       if (cpus_empty(tmp))
1501 +               tmp = TARGET_CPUS;
1502 +
1503 +       vector = assign_irq_vector(irq);
1504 +       if (vector < 0)
1505 +               return;
1506 +
1507 +       dest = cpu_mask_to_apicid(mask);
1508 +
1509 +       read_msi_msg(irq, &msg);
1510 +
1511 +       msg.data &= ~MSI_DATA_VECTOR_MASK;
1512 +       msg.data |= MSI_DATA_VECTOR(vector);
1513 +       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1514 +       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1515 +
1516 +       write_msi_msg(irq, &msg);
1517 +       set_native_irq_info(irq, mask);
1518 +}
1519 +#endif /* CONFIG_SMP */
1520 +
1521 +/*
1522 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
1523 + * which implement the MSI or MSI-X Capability Structure.
1524 + */
1525 +static struct irq_chip msi_chip = {
1526 +       .name           = "PCI-MSI",
1527 +       .unmask         = unmask_msi_irq,
1528 +       .mask           = mask_msi_irq,
1529 +       .ack            = ack_ioapic_irq,
1530 +#ifdef CONFIG_SMP
1531 +       .set_affinity   = set_msi_irq_affinity,
1532 +#endif
1533 +       .retrigger      = ioapic_retrigger_irq,
1534 +};
1535 +
1536 +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1537 +{
1538 +       struct msi_msg msg;
1539 +       int ret;
1540 +       ret = msi_compose_msg(dev, irq, &msg);
1541 +       if (ret < 0)
1542 +               return ret;
1543 +
1544 +       write_msi_msg(irq, &msg);
1545 +
1546 +       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
1547 +                                     "edge");
1548 +
1549 +       return 0;
1550 +}
1551 +
1552 +void arch_teardown_msi_irq(unsigned int irq)
1553 +{
1554 +       return;
1555 +}
1556 +
1557 +#endif /* CONFIG_PCI_MSI */
1558 +
1559 +/*
1560 + * Hypertransport interrupt support
1561 + */
1562 +#ifdef CONFIG_HT_IRQ
1563 +
1564 +#ifdef CONFIG_SMP
1565 +
1566 +static void target_ht_irq(unsigned int irq, unsigned int dest)
1567 +{
1568 +       struct ht_irq_msg msg;
1569 +       fetch_ht_irq_msg(irq, &msg);
1570 +
1571 +       msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
1572 +       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
1573 +
1574 +       msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
1575 +       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
1576 +
1577 +       write_ht_irq_msg(irq, &msg);
1578 +}
1579 +
1580 +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1581 +{
1582 +       unsigned int dest;
1583 +       cpumask_t tmp;
1584 +
1585 +       cpus_and(tmp, mask, cpu_online_map);
1586 +       if (cpus_empty(tmp))
1587 +               tmp = TARGET_CPUS;
1588 +
1589 +       cpus_and(mask, tmp, CPU_MASK_ALL);
1590 +
1591 +       dest = cpu_mask_to_apicid(mask);
1592 +
1593 +       target_ht_irq(irq, dest);
1594 +       set_native_irq_info(irq, mask);
1595 +}
1596 +#endif
1597 +
1598 +static struct irq_chip ht_irq_chip = {
1599 +       .name           = "PCI-HT",
1600 +       .mask           = mask_ht_irq,
1601 +       .unmask         = unmask_ht_irq,
1602 +       .ack            = ack_ioapic_irq,
1603 +#ifdef CONFIG_SMP
1604 +       .set_affinity   = set_ht_irq_affinity,
1605 +#endif
1606 +       .retrigger      = ioapic_retrigger_irq,
1607 +};
1608 +
1609 +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1610 +{
1611 +       int vector;
1612 +
1613 +       vector = assign_irq_vector(irq);
1614 +       if (vector >= 0) {
1615 +               struct ht_irq_msg msg;
1616 +               unsigned dest;
1617 +               cpumask_t tmp;
1618 +
1619 +               cpus_clear(tmp);
1620 +               cpu_set(vector >> 8, tmp);
1621 +               dest = cpu_mask_to_apicid(tmp);
1622 +
1623 +               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1624 +
1625 +               msg.address_lo =
1626 +                       HT_IRQ_LOW_BASE |
1627 +                       HT_IRQ_LOW_DEST_ID(dest) |
1628 +                       HT_IRQ_LOW_VECTOR(vector) |
1629 +                       ((INT_DEST_MODE == 0) ?
1630 +                               HT_IRQ_LOW_DM_PHYSICAL :
1631 +                               HT_IRQ_LOW_DM_LOGICAL) |
1632 +                       HT_IRQ_LOW_RQEOI_EDGE |
1633 +                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1634 +                               HT_IRQ_LOW_MT_FIXED :
1635 +                               HT_IRQ_LOW_MT_ARBITRATED) |
1636 +                       HT_IRQ_LOW_IRQ_MASKED;
1637 +
1638 +               write_ht_irq_msg(irq, &msg);
1639 +
1640 +               set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1641 +                                             handle_edge_irq, "edge");
1642 +       }
1643 +       return vector;
1644 +}
1645 +#endif /* CONFIG_HT_IRQ */
1646 +
1647  /* --------------------------------------------------------------------------
1648                            ACPI-based IOAPIC Configuration
1649     -------------------------------------------------------------------------- */
1650 @@ -2774,13 +2939,34 @@ int io_apic_set_pci_routing (int ioapic,
1651         if (!ioapic && (irq < 16))
1652                 disable_8259A_irq(irq);
1653  
1654 +       ioapic_write_entry(ioapic, pin, entry);
1655         spin_lock_irqsave(&ioapic_lock, flags);
1656 -       io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
1657 -       io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
1658 -       set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
1659 +       set_native_irq_info(irq, TARGET_CPUS);
1660         spin_unlock_irqrestore(&ioapic_lock, flags);
1661  
1662         return 0;
1663  }
1664  
1665  #endif /* CONFIG_ACPI */
1666 +
1667 +static int __init parse_disable_timer_pin_1(char *arg)
1668 +{
1669 +       disable_timer_pin_1 = 1;
1670 +       return 0;
1671 +}
1672 +early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
1673 +
1674 +static int __init parse_enable_timer_pin_1(char *arg)
1675 +{
1676 +       disable_timer_pin_1 = -1;
1677 +       return 0;
1678 +}
1679 +early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
1680 +
1681 +static int __init parse_noapic(char *arg)
1682 +{
1683 +       /* disable IO-APIC */
1684 +       disable_ioapic_setup();
1685 +       return 0;
1686 +}
1687 +early_param("noapic", parse_noapic);
1688 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/irq_32-xen.c       2008-10-29 09:55:56.000000000 +0100
1689 +++ sle11sp1-2010-03-29/arch/x86/kernel/irq_32-xen.c    2009-11-06 10:45:48.000000000 +0100
1690 @@ -53,8 +53,10 @@ static union irq_ctx *softirq_ctx[NR_CPU
1691   */
1692  fastcall unsigned int do_IRQ(struct pt_regs *regs)
1693  {      
1694 +       struct pt_regs *old_regs;
1695         /* high bit used in ret_from_ code */
1696         int irq = ~regs->orig_eax;
1697 +       struct irq_desc *desc = irq_desc + irq;
1698  #ifdef CONFIG_4KSTACKS
1699         union irq_ctx *curctx, *irqctx;
1700         u32 *isp;
1701 @@ -66,6 +68,7 @@ fastcall unsigned int do_IRQ(struct pt_r
1702                 BUG();
1703         }
1704  
1705 +       old_regs = set_irq_regs(regs);
1706         /*irq_enter();*/
1707  #ifdef CONFIG_DEBUG_STACKOVERFLOW
1708         /* Debugging check for stack overflow: is there less than 1KB free? */
1709 @@ -110,19 +113,20 @@ fastcall unsigned int do_IRQ(struct pt_r
1710                         (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
1711  
1712                 asm volatile(
1713 -                       "       xchgl   %%ebx,%%esp      \n"
1714 -                       "       call    __do_IRQ         \n"
1715 +                       "       xchgl  %%ebx,%%esp      \n"
1716 +                       "       call   *%%edi           \n"
1717                         "       movl   %%ebx,%%esp      \n"
1718                         : "=a" (arg1), "=d" (arg2), "=b" (ebx)
1719 -                       :  "0" (irq),   "1" (regs),  "2" (isp)
1720 -                       : "memory", "cc", "ecx"
1721 +                       :  "0" (irq),   "1" (desc),  "2" (isp),
1722 +                          "D" (desc->handle_irq)
1723 +                       : "memory", "cc"
1724                 );
1725         } else
1726  #endif
1727 -               __do_IRQ(irq, regs);
1728 +               desc->handle_irq(irq, desc);
1729  
1730         /*irq_exit();*/
1731 -
1732 +       set_irq_regs(old_regs);
1733         return 1;
1734  }
1735  
1736 @@ -253,7 +257,8 @@ int show_interrupts(struct seq_file *p, 
1737                 for_each_online_cpu(j)
1738                         seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
1739  #endif
1740 -               seq_printf(p, " %14s", irq_desc[i].chip->typename);
1741 +               seq_printf(p, " %8s", irq_desc[i].chip->name);
1742 +               seq_printf(p, "-%-8s", irq_desc[i].name);
1743                 seq_printf(p, "  %s", action->name);
1744  
1745                 for (action=action->next; action; action = action->next)
1746 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/ldt_32-xen.c       2007-06-12 13:12:48.000000000 +0200
1747 +++ sle11sp1-2010-03-29/arch/x86/kernel/ldt_32-xen.c    2009-11-06 10:45:48.000000000 +0100
1748 @@ -1,5 +1,5 @@
1749  /*
1750 - * linux/kernel/ldt.c
1751 + * linux/arch/i386/kernel/ldt.c
1752   *
1753   * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
1754   * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
1755 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/microcode-xen.c    2007-06-12 13:12:48.000000000 +0200
1756 +++ sle11sp1-2010-03-29/arch/x86/kernel/microcode-xen.c 2009-11-06 10:45:48.000000000 +0100
1757 @@ -2,6 +2,7 @@
1758   *     Intel CPU Microcode Update Driver for Linux
1759   *
1760   *     Copyright (C) 2000-2004 Tigran Aivazian
1761 + *                   2006      Shaohua Li <shaohua.li@intel.com>
1762   *
1763   *     This driver allows to upgrade microcode on Intel processors
1764   *     belonging to IA-32 family - PentiumPro, Pentium II, 
1765 @@ -33,7 +34,9 @@
1766  #include <linux/spinlock.h>
1767  #include <linux/mm.h>
1768  #include <linux/mutex.h>
1769 -#include <linux/syscalls.h>
1770 +#include <linux/cpu.h>
1771 +#include <linux/firmware.h>
1772 +#include <linux/platform_device.h>
1773  
1774  #include <asm/msr.h>
1775  #include <asm/uaccess.h>
1776 @@ -55,12 +58,7 @@ module_param(verbose, int, 0644);
1777  /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
1778  static DEFINE_MUTEX(microcode_mutex);
1779                                 
1780 -static int microcode_open (struct inode *unused1, struct file *unused2)
1781 -{
1782 -       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1783 -}
1784 -
1785 -
1786 +#ifdef CONFIG_MICROCODE_OLD_INTERFACE
1787  static int do_microcode_update (const void __user *ubuf, size_t len)
1788  {
1789         int err;
1790 @@ -85,6 +83,11 @@ static int do_microcode_update (const vo
1791         return err;
1792  }
1793  
1794 +static int microcode_open (struct inode *unused1, struct file *unused2)
1795 +{
1796 +       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1797 +}
1798 +
1799  static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
1800  {
1801         ssize_t ret;
1802 @@ -117,7 +120,7 @@ static struct miscdevice microcode_dev =
1803         .fops           = &microcode_fops,
1804  };
1805  
1806 -static int __init microcode_init (void)
1807 +static int __init microcode_dev_init (void)
1808  {
1809         int error;
1810  
1811 @@ -129,6 +132,68 @@ static int __init microcode_init (void)
1812                 return error;
1813         }
1814  
1815 +       return 0;
1816 +}
1817 +
1818 +static void __exit microcode_dev_exit (void)
1819 +{
1820 +       misc_deregister(&microcode_dev);
1821 +}
1822 +
1823 +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1824 +#else
1825 +#define microcode_dev_init() 0
1826 +#define microcode_dev_exit() do { } while(0)
1827 +#endif
1828 +
1829 +/* fake device for request_firmware */
1830 +static struct platform_device *microcode_pdev;
1831 +
1832 +static int request_microcode(void)
1833 +{
1834 +       char name[30];
1835 +       const struct cpuinfo_x86 *c = &boot_cpu_data;
1836 +       const struct firmware *firmware;
1837 +       int error;
1838 +       struct xen_platform_op op;
1839 +
1840 +       sprintf(name,"intel-ucode/%02x-%02x-%02x",
1841 +               c->x86, c->x86_model, c->x86_mask);
1842 +       error = request_firmware(&firmware, name, &microcode_pdev->dev);
1843 +       if (error) {
1844 +               pr_debug("ucode data file %s load failed\n", name);
1845 +               return error;
1846 +       }
1847 +
1848 +       op.cmd = XENPF_microcode_update;
1849 +       set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
1850 +       op.u.microcode.length = firmware->size;
1851 +       error = HYPERVISOR_platform_op(&op);
1852 +
1853 +       release_firmware(firmware);
1854 +
1855 +       if (error)
1856 +               pr_debug("ucode load failed\n");
1857 +
1858 +       return error;
1859 +}
1860 +
1861 +static int __init microcode_init (void)
1862 +{
1863 +       int error;
1864 +
1865 +       error = microcode_dev_init();
1866 +       if (error)
1867 +               return error;
1868 +       microcode_pdev = platform_device_register_simple("microcode", -1,
1869 +                                                        NULL, 0);
1870 +       if (IS_ERR(microcode_pdev)) {
1871 +               microcode_dev_exit();
1872 +               return PTR_ERR(microcode_pdev);
1873 +       }
1874 +
1875 +       request_microcode();
1876 +
1877         printk(KERN_INFO 
1878                 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
1879         return 0;
1880 @@ -136,9 +201,9 @@ static int __init microcode_init (void)
1881  
1882  static void __exit microcode_exit (void)
1883  {
1884 -       misc_deregister(&microcode_dev);
1885 +       microcode_dev_exit();
1886 +       platform_device_unregister(microcode_pdev);
1887  }
1888  
1889  module_init(microcode_init)
1890  module_exit(microcode_exit)
1891 -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1892 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/mpparse_32-xen.c   2007-06-12 13:12:48.000000000 +0200
1893 +++ sle11sp1-2010-03-29/arch/x86/kernel/mpparse_32-xen.c        2009-11-06 10:45:48.000000000 +0100
1894 @@ -30,6 +30,7 @@
1895  #include <asm/io_apic.h>
1896  
1897  #include <mach_apic.h>
1898 +#include <mach_apicdef.h>
1899  #include <mach_mpparse.h>
1900  #include <bios_ebda.h>
1901  
1902 @@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
1903  /* Processor that is doing the boot up */
1904  unsigned int boot_cpu_physical_apicid = -1U;
1905  /* Internal processor count */
1906 -static unsigned int __devinitdata num_processors;
1907 +unsigned int __cpuinitdata num_processors;
1908  
1909  /* Bitmask of physically existing CPUs */
1910  physid_mask_t phys_cpu_present_map;
1911 @@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m
1912  
1913         mpc_oem_bus_info(m, str, translation_table[mpc_record]);
1914  
1915 +#if MAX_MP_BUSSES < 256
1916         if (m->mpc_busid >= MAX_MP_BUSSES) {
1917                 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
1918                         " is too large, max. supported is %d\n",
1919                         m->mpc_busid, str, MAX_MP_BUSSES - 1);
1920                 return;
1921         }
1922 +#endif
1923  
1924         if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
1925                 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
1926 @@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru
1927                         m->mpc_irqtype, m->mpc_irqflag & 3,
1928                         (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
1929                         m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
1930 -       /*
1931 -        * Well it seems all SMP boards in existence
1932 -        * use ExtINT/LVT1 == LINT0 and
1933 -        * NMI/LVT2 == LINT1 - the following check
1934 -        * will show us if this assumptions is false.
1935 -        * Until then we do not have to add baggage.
1936 -        */
1937 -       if ((m->mpc_irqtype == mp_ExtINT) &&
1938 -               (m->mpc_destapiclint != 0))
1939 -                       BUG();
1940 -       if ((m->mpc_irqtype == mp_NMI) &&
1941 -               (m->mpc_destapiclint != 1))
1942 -                       BUG();
1943  }
1944  
1945  #ifdef CONFIG_X86_NUMAQ
1946 @@ -838,8 +828,7 @@ int es7000_plat;
1947  
1948  #ifdef CONFIG_ACPI
1949  
1950 -void __init mp_register_lapic_address (
1951 -       u64                     address)
1952 +void __init mp_register_lapic_address(u64 address)
1953  {
1954  #ifndef CONFIG_XEN
1955         mp_lapic_addr = (unsigned long) address;
1956 @@ -853,13 +842,10 @@ void __init mp_register_lapic_address (
1957  #endif
1958  }
1959  
1960 -
1961 -void __devinit mp_register_lapic (
1962 -       u8                      id, 
1963 -       u8                      enabled)
1964 +void __devinit mp_register_lapic (u8 id, u8 enabled)
1965  {
1966         struct mpc_config_processor processor;
1967 -       int                     boot_cpu = 0;
1968 +       int boot_cpu = 0;
1969         
1970         if (MAX_APICS - id <= 0) {
1971                 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
1972 @@ -898,11 +884,9 @@ static struct mp_ioapic_routing {
1973         u32                     pin_programmed[4];
1974  } mp_ioapic_routing[MAX_IO_APICS];
1975  
1976 -
1977 -static int mp_find_ioapic (
1978 -       int                     gsi)
1979 +static int mp_find_ioapic (int gsi)
1980  {
1981 -       int                     i = 0;
1982 +       int i = 0;
1983  
1984         /* Find the IOAPIC that manages this GSI. */
1985         for (i = 0; i < nr_ioapics; i++) {
1986 @@ -915,15 +899,11 @@ static int mp_find_ioapic (
1987  
1988         return -1;
1989  }
1990 -       
1991  
1992 -void __init mp_register_ioapic (
1993 -       u8                      id, 
1994 -       u32                     address,
1995 -       u32                     gsi_base)
1996 +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
1997  {
1998 -       int                     idx = 0;
1999 -       int                     tmpid;
2000 +       int idx = 0;
2001 +       int tmpid;
2002  
2003         if (nr_ioapics >= MAX_IO_APICS) {
2004                 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
2005 @@ -971,16 +951,10 @@ void __init mp_register_ioapic (
2006                 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
2007                 mp_ioapic_routing[idx].gsi_base,
2008                 mp_ioapic_routing[idx].gsi_end);
2009 -
2010 -       return;
2011  }
2012  
2013 -
2014 -void __init mp_override_legacy_irq (
2015 -       u8                      bus_irq,
2016 -       u8                      polarity, 
2017 -       u8                      trigger, 
2018 -       u32                     gsi)
2019 +void __init
2020 +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
2021  {
2022         struct mpc_config_intsrc intsrc;
2023         int                     ioapic = -1;
2024 @@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq (
2025         mp_irqs[mp_irq_entries] = intsrc;
2026         if (++mp_irq_entries == MAX_IRQ_SOURCES)
2027                 panic("Max # of irq sources exceeded!\n");
2028 -
2029 -       return;
2030  }
2031  
2032  void __init mp_config_acpi_legacy_irqs (void)
2033  {
2034         struct mpc_config_intsrc intsrc;
2035 -       int                     i = 0;
2036 -       int                     ioapic = -1;
2037 +       int i = 0;
2038 +       int ioapic = -1;
2039  
2040         /* 
2041          * Fabricate the legacy ISA bus (bus #31).
2042 @@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs (
2043  
2044  #define MAX_GSI_NUM    4096
2045  
2046 -int mp_register_gsi (u32 gsi, int triggering, int polarity)
2047 +int mp_register_gsi(u32 gsi, int triggering, int polarity)
2048  {
2049 -       int                     ioapic = -1;
2050 -       int                     ioapic_pin = 0;
2051 -       int                     idx, bit = 0;
2052 -       static int              pci_irq = 16;
2053 +       int ioapic = -1;
2054 +       int ioapic_pin = 0;
2055 +       int idx, bit = 0;
2056 +       static int pci_irq = 16;
2057         /*
2058          * Mapping between Global System Interrups, which
2059          * represent all possible interrupts, and IRQs
2060 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/pci-dma-xen.c      2009-11-06 10:23:23.000000000 +0100
2061 +++ sle11sp1-2010-03-29/arch/x86/kernel/pci-dma-xen.c   2009-11-06 10:45:48.000000000 +0100
2062 @@ -110,8 +110,7 @@ dma_map_sg(struct device *hwdev, struct 
2063  {
2064         int i, rc;
2065  
2066 -       if (direction == DMA_NONE)
2067 -               BUG();
2068 +       BUG_ON(!valid_dma_direction(direction));
2069         WARN_ON(nents == 0 || sg[0].length == 0);
2070  
2071         if (swiotlb) {
2072 @@ -142,7 +141,7 @@ dma_unmap_sg(struct device *hwdev, struc
2073  {
2074         int i;
2075  
2076 -       BUG_ON(direction == DMA_NONE);
2077 +       BUG_ON(!valid_dma_direction(direction));
2078         if (swiotlb)
2079                 swiotlb_unmap_sg(hwdev, sg, nents, direction);
2080         else {
2081 @@ -159,8 +158,7 @@ dma_map_page(struct device *dev, struct 
2082  {
2083         dma_addr_t dma_addr;
2084  
2085 -       BUG_ON(direction == DMA_NONE);
2086 -
2087 +       BUG_ON(!valid_dma_direction(direction));
2088         if (swiotlb) {
2089                 dma_addr = swiotlb_map_page(
2090                         dev, page, offset, size, direction);
2091 @@ -177,7 +175,7 @@ void
2092  dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
2093                enum dma_data_direction direction)
2094  {
2095 -       BUG_ON(direction == DMA_NONE);
2096 +       BUG_ON(!valid_dma_direction(direction));
2097         if (swiotlb)
2098                 swiotlb_unmap_page(dev, dma_address, size, direction);
2099         else
2100 @@ -356,8 +354,7 @@ dma_map_single(struct device *dev, void 
2101  {
2102         dma_addr_t dma;
2103  
2104 -       if (direction == DMA_NONE)
2105 -               BUG();
2106 +       BUG_ON(!valid_dma_direction(direction));
2107         WARN_ON(size == 0);
2108  
2109         if (swiotlb) {
2110 @@ -378,8 +375,7 @@ void
2111  dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
2112                  enum dma_data_direction direction)
2113  {
2114 -       if (direction == DMA_NONE)
2115 -               BUG();
2116 +       BUG_ON(!valid_dma_direction(direction));
2117         if (swiotlb)
2118                 swiotlb_unmap_single(dev, dma_addr, size, direction);
2119         else
2120 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/process_32-xen.c   2008-07-21 11:00:32.000000000 +0200
2121 +++ sle11sp1-2010-03-29/arch/x86/kernel/process_32-xen.c        2009-11-06 10:45:48.000000000 +0100
2122 @@ -37,6 +37,7 @@
2123  #include <linux/kallsyms.h>
2124  #include <linux/ptrace.h>
2125  #include <linux/random.h>
2126 +#include <linux/personality.h>
2127  
2128  #include <asm/uaccess.h>
2129  #include <asm/pgtable.h>
2130 @@ -186,7 +187,7 @@ void cpu_idle(void)
2131  void cpu_idle_wait(void)
2132  {
2133         unsigned int cpu, this_cpu = get_cpu();
2134 -       cpumask_t map;
2135 +       cpumask_t map, tmp = current->cpus_allowed;
2136  
2137         set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
2138         put_cpu();
2139 @@ -208,6 +209,8 @@ void cpu_idle_wait(void)
2140                 }
2141                 cpus_and(map, map, cpu_online_map);
2142         } while (!cpus_empty(map));
2143 +
2144 +       set_cpus_allowed(current, tmp);
2145  }
2146  EXPORT_SYMBOL_GPL(cpu_idle_wait);
2147  
2148 @@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs)
2149         if (user_mode_vm(regs))
2150                 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
2151         printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
2152 -              regs->eflags, print_tainted(), system_utsname.release,
2153 -              (int)strcspn(system_utsname.version, " "),
2154 -              system_utsname.version);
2155 +              regs->eflags, print_tainted(), init_utsname()->release,
2156 +              (int)strcspn(init_utsname()->version, " "),
2157 +              init_utsname()->version);
2158         printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
2159                 regs->eax,regs->ebx,regs->ecx,regs->edx);
2160         printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2161 @@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs)
2162   * the "args".
2163   */
2164  extern void kernel_thread_helper(void);
2165 -__asm__(".section .text\n"
2166 -       ".align 4\n"
2167 -       "kernel_thread_helper:\n\t"
2168 -       "movl %edx,%eax\n\t"
2169 -       "pushl %edx\n\t"
2170 -       "call *%ebx\n\t"
2171 -       "pushl %eax\n\t"
2172 -       "call do_exit\n"
2173 -       ".previous");
2174  
2175  /*
2176   * Create a kernel thread
2177 @@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi
2178         regs.xes = __USER_DS;
2179         regs.orig_eax = -1;
2180         regs.eip = (unsigned long) kernel_thread_helper;
2181 -       regs.xcs = GET_KERNEL_CS();
2182 +       regs.xcs = __KERNEL_CS | get_kernel_rpl();
2183         regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
2184  
2185         /* Ok, create the new process.. */
2186 @@ -369,13 +363,12 @@ int copy_thread(int nr, unsigned long cl
2187  
2188         tsk = current;
2189         if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2190 -               p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
2191 +               p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
2192 +                                               IO_BITMAP_BYTES, GFP_KERNEL);
2193                 if (!p->thread.io_bitmap_ptr) {
2194                         p->thread.io_bitmap_max = 0;
2195                         return -ENOMEM;
2196                 }
2197 -               memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
2198 -                       IO_BITMAP_BYTES);
2199                 set_tsk_thread_flag(p, TIF_IO_BITMAP);
2200         }
2201  
2202 @@ -871,7 +864,7 @@ asmlinkage int sys_get_thread_area(struc
2203  
2204  unsigned long arch_align_stack(unsigned long sp)
2205  {
2206 -       if (randomize_va_space)
2207 +       if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
2208                 sp -= get_random_int() % 8192;
2209         return sp & ~0xf;
2210  }
2211 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/setup_32-xen.c     2008-04-22 15:41:51.000000000 +0200
2212 +++ sle11sp1-2010-03-29/arch/x86/kernel/setup_32-xen.c  2009-11-06 10:45:48.000000000 +0100
2213 @@ -56,6 +56,7 @@
2214  #include <asm/apic.h>
2215  #include <asm/e820.h>
2216  #include <asm/mpspec.h>
2217 +#include <asm/mmzone.h>
2218  #include <asm/setup.h>
2219  #include <asm/arch_hooks.h>
2220  #include <asm/sections.h>
2221 @@ -83,9 +84,6 @@ static struct notifier_block xen_panic_b
2222         xen_panic_event, NULL, 0 /* try to go last */
2223  };
2224  
2225 -extern char hypercall_page[PAGE_SIZE];
2226 -EXPORT_SYMBOL(hypercall_page);
2227 -
2228  int disable_pse __devinitdata = 0;
2229  
2230  /*
2231 @@ -105,18 +103,6 @@ EXPORT_SYMBOL(boot_cpu_data);
2232  
2233  unsigned long mmu_cr4_features;
2234  
2235 -#ifdef CONFIG_ACPI
2236 -       int acpi_disabled = 0;
2237 -#else
2238 -       int acpi_disabled = 1;
2239 -#endif
2240 -EXPORT_SYMBOL(acpi_disabled);
2241 -
2242 -#ifdef CONFIG_ACPI
2243 -int __initdata acpi_force = 0;
2244 -extern acpi_interrupt_flags    acpi_sci_flags;
2245 -#endif
2246 -
2247  /* for MCA, but anyone else can use it if they want */
2248  unsigned int machine_id;
2249  #ifdef CONFIG_MCA
2250 @@ -170,7 +156,6 @@ struct e820map machine_e820;
2251  #endif
2252  
2253  extern void early_cpu_init(void);
2254 -extern void generic_apic_probe(char *);
2255  extern int root_mountflags;
2256  
2257  unsigned long saved_videomode;
2258 @@ -243,9 +228,6 @@ static struct resource adapter_rom_resou
2259         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2260  } };
2261  
2262 -#define ADAPTER_ROM_RESOURCES \
2263 -       (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
2264 -
2265  static struct resource video_rom_resource = {
2266         .name   = "Video ROM",
2267         .start  = 0xc0000,
2268 @@ -307,9 +289,6 @@ static struct resource standard_io_resou
2269         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
2270  } };
2271  
2272 -#define STANDARD_IO_RESOURCES \
2273 -       (sizeof standard_io_resources / sizeof standard_io_resources[0])
2274 -
2275  #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
2276  
2277  static int __init romchecksum(unsigned char *rom, unsigned long length)
2278 @@ -372,7 +351,7 @@ static void __init probe_roms(void)
2279         }
2280  
2281         /* check for adapter roms on 2k boundaries */
2282 -       for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
2283 +       for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
2284                 rom = isa_bus_to_virt(start);
2285                 if (!romsignature(rom))
2286                         continue;
2287 @@ -779,246 +758,152 @@ static inline void copy_edd(void)
2288  }
2289  #endif
2290  
2291 -static void __init parse_cmdline_early (char ** cmdline_p)
2292 +static int __initdata user_defined_memmap = 0;
2293 +
2294 +/*
2295 + * "mem=nopentium" disables the 4MB page tables.
2296 + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2297 + * to <mem>, overriding the bios size.
2298 + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2299 + * <start> to <start>+<mem>, overriding the bios size.
2300 + *
2301 + * HPA tells me bootloaders need to parse mem=, so no new
2302 + * option should be mem=  [also see Documentation/i386/boot.txt]
2303 + */
2304 +static int __init parse_mem(char *arg)
2305  {
2306 -       char c = ' ', *to = command_line, *from = saved_command_line;
2307 -       int len = 0, max_cmdline;
2308 -       int userdef = 0;
2309 -
2310 -       if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2311 -               max_cmdline = COMMAND_LINE_SIZE;
2312 -       memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
2313 -       /* Save unparsed command line copy for /proc/cmdline */
2314 -       saved_command_line[max_cmdline-1] = '\0';
2315 -
2316 -       for (;;) {
2317 -               if (c != ' ')
2318 -                       goto next_char;
2319 -               /*
2320 -                * "mem=nopentium" disables the 4MB page tables.
2321 -                * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2322 -                * to <mem>, overriding the bios size.
2323 -                * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2324 -                * <start> to <start>+<mem>, overriding the bios size.
2325 -                *
2326 -                * HPA tells me bootloaders need to parse mem=, so no new
2327 -                * option should be mem=  [also see Documentation/i386/boot.txt]
2328 -                */
2329 -               if (!memcmp(from, "mem=", 4)) {
2330 -                       if (to != command_line)
2331 -                               to--;
2332 -                       if (!memcmp(from+4, "nopentium", 9)) {
2333 -                               from += 9+4;
2334 -                               clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2335 -                               disable_pse = 1;
2336 -                       } else {
2337 -                               /* If the user specifies memory size, we
2338 -                                * limit the BIOS-provided memory map to
2339 -                                * that size. exactmap can be used to specify
2340 -                                * the exact map. mem=number can be used to
2341 -                                * trim the existing memory map.
2342 -                                */
2343 -                               unsigned long long mem_size;
2344
2345 -                               mem_size = memparse(from+4, &from);
2346 -                               limit_regions(mem_size);
2347 -                               userdef=1;
2348 -                       }
2349 -               }
2350 +       if (!arg)
2351 +               return -EINVAL;
2352  
2353 -               else if (!memcmp(from, "memmap=", 7)) {
2354 -                       if (to != command_line)
2355 -                               to--;
2356 -                       if (!memcmp(from+7, "exactmap", 8)) {
2357 -#ifdef CONFIG_CRASH_DUMP
2358 -                               /* If we are doing a crash dump, we
2359 -                                * still need to know the real mem
2360 -                                * size before original memory map is
2361 -                                * reset.
2362 -                                */
2363 -                               find_max_pfn();
2364 -                               saved_max_pfn = max_pfn;
2365 -#endif
2366 -                               from += 8+7;
2367 -                               e820.nr_map = 0;
2368 -                               userdef = 1;
2369 -                       } else {
2370 -                               /* If the user specifies memory size, we
2371 -                                * limit the BIOS-provided memory map to
2372 -                                * that size. exactmap can be used to specify
2373 -                                * the exact map. mem=number can be used to
2374 -                                * trim the existing memory map.
2375 -                                */
2376 -                               unsigned long long start_at, mem_size;
2377 +       if (strcmp(arg, "nopentium") == 0) {
2378 +               clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2379 +               disable_pse = 1;
2380 +       } else {
2381 +               /* If the user specifies memory size, we
2382 +                * limit the BIOS-provided memory map to
2383 +                * that size. exactmap can be used to specify
2384 +                * the exact map. mem=number can be used to
2385 +                * trim the existing memory map.
2386 +                */
2387 +               unsigned long long mem_size;
2388   
2389 -                               mem_size = memparse(from+7, &from);
2390 -                               if (*from == '@') {
2391 -                                       start_at = memparse(from+1, &from);
2392 -                                       add_memory_region(start_at, mem_size, E820_RAM);
2393 -                               } else if (*from == '#') {
2394 -                                       start_at = memparse(from+1, &from);
2395 -                                       add_memory_region(start_at, mem_size, E820_ACPI);
2396 -                               } else if (*from == '$') {
2397 -                                       start_at = memparse(from+1, &from);
2398 -                                       add_memory_region(start_at, mem_size, E820_RESERVED);
2399 -                               } else {
2400 -                                       limit_regions(mem_size);
2401 -                                       userdef=1;
2402 -                               }
2403 -                       }
2404 -               }
2405 -
2406 -               else if (!memcmp(from, "noexec=", 7))
2407 -                       noexec_setup(from + 7);
2408 +               mem_size = memparse(arg, &arg);
2409 +               limit_regions(mem_size);
2410 +               user_defined_memmap = 1;
2411 +       }
2412 +       return 0;
2413 +}
2414 +early_param("mem", parse_mem);
2415  
2416 +static int __init parse_memmap(char *arg)
2417 +{
2418 +       if (!arg)
2419 +               return -EINVAL;
2420  
2421 -#ifdef  CONFIG_X86_MPPARSE
2422 -               /*
2423 -                * If the BIOS enumerates physical processors before logical,
2424 -                * maxcpus=N at enumeration-time can be used to disable HT.
2425 +       if (strcmp(arg, "exactmap") == 0) {
2426 +#ifdef CONFIG_CRASH_DUMP
2427 +               /* If we are doing a crash dump, we
2428 +                * still need to know the real mem
2429 +                * size before original memory map is
2430 +                * reset.
2431                  */
2432 -               else if (!memcmp(from, "maxcpus=", 8)) {
2433 -                       extern unsigned int maxcpus;
2434 -
2435 -                       maxcpus = simple_strtoul(from + 8, NULL, 0);
2436 -               }
2437 +               find_max_pfn();
2438 +               saved_max_pfn = max_pfn;
2439  #endif
2440 +               e820.nr_map = 0;
2441 +               user_defined_memmap = 1;
2442 +       } else {
2443 +               /* If the user specifies memory size, we
2444 +                * limit the BIOS-provided memory map to
2445 +                * that size. exactmap can be used to specify
2446 +                * the exact map. mem=number can be used to
2447 +                * trim the existing memory map.
2448 +                */
2449 +               unsigned long long start_at, mem_size;
2450  
2451 -#ifdef CONFIG_ACPI
2452 -               /* "acpi=off" disables both ACPI table parsing and interpreter */
2453 -               else if (!memcmp(from, "acpi=off", 8)) {
2454 -                       disable_acpi();
2455 -               }
2456 -
2457 -               /* acpi=force to over-ride black-list */
2458 -               else if (!memcmp(from, "acpi=force", 10)) {
2459 -                       acpi_force = 1;
2460 -                       acpi_ht = 1;
2461 -                       acpi_disabled = 0;
2462 -               }
2463 -
2464 -               /* acpi=strict disables out-of-spec workarounds */
2465 -               else if (!memcmp(from, "acpi=strict", 11)) {
2466 -                       acpi_strict = 1;
2467 -               }
2468 -
2469 -               /* Limit ACPI just to boot-time to enable HT */
2470 -               else if (!memcmp(from, "acpi=ht", 7)) {
2471 -                       if (!acpi_force)
2472 -                               disable_acpi();
2473 -                       acpi_ht = 1;
2474 -               }
2475 -               
2476 -               /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
2477 -               else if (!memcmp(from, "pci=noacpi", 10)) {
2478 -                       acpi_disable_pci();
2479 -               }
2480 -               /* "acpi=noirq" disables ACPI interrupt routing */
2481 -               else if (!memcmp(from, "acpi=noirq", 10)) {
2482 -                       acpi_noirq_set();
2483 +               mem_size = memparse(arg, &arg);
2484 +               if (*arg == '@') {
2485 +                       start_at = memparse(arg+1, &arg);
2486 +                       add_memory_region(start_at, mem_size, E820_RAM);
2487 +               } else if (*arg == '#') {
2488 +                       start_at = memparse(arg+1, &arg);
2489 +                       add_memory_region(start_at, mem_size, E820_ACPI);
2490 +               } else if (*arg == '$') {
2491 +                       start_at = memparse(arg+1, &arg);
2492 +                       add_memory_region(start_at, mem_size, E820_RESERVED);
2493 +               } else {
2494 +                       limit_regions(mem_size);
2495 +                       user_defined_memmap = 1;
2496                 }
2497 +       }
2498 +       return 0;
2499 +}
2500 +early_param("memmap", parse_memmap);
2501  
2502 -               else if (!memcmp(from, "acpi_sci=edge", 13))
2503 -                       acpi_sci_flags.trigger =  1;
2504 -
2505 -               else if (!memcmp(from, "acpi_sci=level", 14))
2506 -                       acpi_sci_flags.trigger = 3;
2507 +#ifdef CONFIG_PROC_VMCORE
2508 +/* elfcorehdr= specifies the location of elf core header
2509 + * stored by the crashed kernel.
2510 + */
2511 +static int __init parse_elfcorehdr(char *arg)
2512 +{
2513 +       if (!arg)
2514 +               return -EINVAL;
2515  
2516 -               else if (!memcmp(from, "acpi_sci=high", 13))
2517 -                       acpi_sci_flags.polarity = 1;
2518 +       elfcorehdr_addr = memparse(arg, &arg);
2519 +       return 0;
2520 +}
2521 +early_param("elfcorehdr", parse_elfcorehdr);
2522 +#endif /* CONFIG_PROC_VMCORE */
2523  
2524 -               else if (!memcmp(from, "acpi_sci=low", 12))
2525 -                       acpi_sci_flags.polarity = 3;
2526 +/*
2527 + * highmem=size forces highmem to be exactly 'size' bytes.
2528 + * This works even on boxes that have no highmem otherwise.
2529 + * This also works to reduce highmem size on bigger boxes.
2530 + */
2531 +static int __init parse_highmem(char *arg)
2532 +{
2533 +       if (!arg)
2534 +               return -EINVAL;
2535  
2536 -#ifdef CONFIG_X86_IO_APIC
2537 -               else if (!memcmp(from, "acpi_skip_timer_override", 24))
2538 -                       acpi_skip_timer_override = 1;
2539 +       highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
2540 +       return 0;
2541 +}
2542 +early_param("highmem", parse_highmem);
2543  
2544 -               if (!memcmp(from, "disable_timer_pin_1", 19))
2545 -                       disable_timer_pin_1 = 1;
2546 -               if (!memcmp(from, "enable_timer_pin_1", 18))
2547 -                       disable_timer_pin_1 = -1;
2548 -
2549 -               /* disable IO-APIC */
2550 -               else if (!memcmp(from, "noapic", 6))
2551 -                       disable_ioapic_setup();
2552 -#endif /* CONFIG_X86_IO_APIC */
2553 -#endif /* CONFIG_ACPI */
2554 +/*
2555 + * vmalloc=size forces the vmalloc area to be exactly 'size'
2556 + * bytes. This can be used to increase (or decrease) the
2557 + * vmalloc area - the default is 128m.
2558 + */
2559 +static int __init parse_vmalloc(char *arg)
2560 +{
2561 +       if (!arg)
2562 +               return -EINVAL;
2563  
2564 -#ifdef CONFIG_X86_LOCAL_APIC
2565 -               /* enable local APIC */
2566 -               else if (!memcmp(from, "lapic", 5))
2567 -                       lapic_enable();
2568 -
2569 -               /* disable local APIC */
2570 -               else if (!memcmp(from, "nolapic", 6))
2571 -                       lapic_disable();
2572 -#endif /* CONFIG_X86_LOCAL_APIC */
2573 +       __VMALLOC_RESERVE = memparse(arg, &arg);
2574 +       return 0;
2575 +}
2576 +early_param("vmalloc", parse_vmalloc);
2577  
2578 -#ifdef CONFIG_KEXEC
2579 -               /* crashkernel=size@addr specifies the location to reserve for
2580 -                * a crash kernel.  By reserving this memory we guarantee
2581 -                * that linux never set's it up as a DMA target.
2582 -                * Useful for holding code to do something appropriate
2583 -                * after a kernel panic.
2584 -                */
2585 -               else if (!memcmp(from, "crashkernel=", 12)) {
2586  #ifndef CONFIG_XEN
2587 -                       unsigned long size, base;
2588 -                       size = memparse(from+12, &from);
2589 -                       if (*from == '@') {
2590 -                               base = memparse(from+1, &from);
2591 -                               /* FIXME: Do I want a sanity check
2592 -                                * to validate the memory range?
2593 -                                */
2594 -                               crashk_res.start = base;
2595 -                               crashk_res.end   = base + size - 1;
2596 -                       }
2597 -#else
2598 -                       printk("Ignoring crashkernel command line, "
2599 -                              "parameter will be supplied by xen\n");
2600 -#endif
2601 -               }
2602 -#endif
2603 -#ifdef CONFIG_PROC_VMCORE
2604 -               /* elfcorehdr= specifies the location of elf core header
2605 -                * stored by the crashed kernel.
2606 -                */
2607 -               else if (!memcmp(from, "elfcorehdr=", 11))
2608 -                       elfcorehdr_addr = memparse(from+11, &from);
2609 -#endif
2610 +/*
2611 + * reservetop=size reserves a hole at the top of the kernel address space which
2612 + * a hypervisor can load into later.  Needed for dynamically loaded hypervisors,
2613 + * so relocating the fixmap can be done before paging initialization.
2614 + */
2615 +static int __init parse_reservetop(char *arg)
2616 +{
2617 +       unsigned long address;
2618  
2619 -               /*
2620 -                * highmem=size forces highmem to be exactly 'size' bytes.
2621 -                * This works even on boxes that have no highmem otherwise.
2622 -                * This also works to reduce highmem size on bigger boxes.
2623 -                */
2624 -               else if (!memcmp(from, "highmem=", 8))
2625 -                       highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
2626 -       
2627 -               /*
2628 -                * vmalloc=size forces the vmalloc area to be exactly 'size'
2629 -                * bytes. This can be used to increase (or decrease) the
2630 -                * vmalloc area - the default is 128m.
2631 -                */
2632 -               else if (!memcmp(from, "vmalloc=", 8))
2633 -                       __VMALLOC_RESERVE = memparse(from+8, &from);
2634 +       if (!arg)
2635 +               return -EINVAL;
2636  
2637 -       next_char:
2638 -               c = *(from++);
2639 -               if (!c)
2640 -                       break;
2641 -               if (COMMAND_LINE_SIZE <= ++len)
2642 -                       break;
2643 -               *(to++) = c;
2644 -       }
2645 -       *to = '\0';
2646 -       *cmdline_p = command_line;
2647 -       if (userdef) {
2648 -               printk(KERN_INFO "user-defined physical RAM map:\n");
2649 -               print_memory_map("user");
2650 -       }
2651 +       address = memparse(arg, &arg);
2652 +       reserve_top_address(address);
2653 +       return 0;
2654  }
2655 +early_param("reservetop", parse_reservetop);
2656 +#endif
2657  
2658  /*
2659   * Callback for efi_memory_walk.
2660 @@ -1039,7 +924,7 @@ efi_find_max_pfn(unsigned long start, un
2661  static int __init
2662  efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
2663  {
2664 -       memory_present(0, start, end);
2665 +       memory_present(0, PFN_UP(start), PFN_DOWN(end));
2666         return 0;
2667  }
2668  
2669 @@ -1306,6 +1191,14 @@ static unsigned long __init setup_memory
2670         }
2671         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
2672                 pages_to_mb(highend_pfn - highstart_pfn));
2673 +       num_physpages = highend_pfn;
2674 +       high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
2675 +#else
2676 +       num_physpages = max_low_pfn;
2677 +       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
2678 +#endif
2679 +#ifdef CONFIG_FLATMEM
2680 +       max_mapnr = num_physpages;
2681  #endif
2682         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
2683                         pages_to_mb(max_low_pfn));
2684 @@ -1317,22 +1210,21 @@ static unsigned long __init setup_memory
2685  
2686  void __init zone_sizes_init(void)
2687  {
2688 -       unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
2689 -       unsigned int max_dma, low;
2690 -
2691 -       max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2692 -       low = max_low_pfn;
2693 -
2694 -       if (low < max_dma)
2695 -               zones_size[ZONE_DMA] = low;
2696 -       else {
2697 -               zones_size[ZONE_DMA] = max_dma;
2698 -               zones_size[ZONE_NORMAL] = low - max_dma;
2699 +       unsigned long max_zone_pfns[MAX_NR_ZONES];
2700 +       memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
2701 +       max_zone_pfns[ZONE_DMA] =
2702 +               virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2703 +       max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
2704  #ifdef CONFIG_HIGHMEM
2705 -               zones_size[ZONE_HIGHMEM] = highend_pfn - low;
2706 +       max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
2707 +       add_active_range(0, 0, min(xen_start_info->nr_pages, highend_pfn));
2708 +       add_active_range(0, highend_pfn, highend_pfn);
2709 +#else
2710 +       add_active_range(0, 0, min(xen_start_info->nr_pages, max_low_pfn));
2711 +       add_active_range(0, max_low_pfn, max_low_pfn);
2712  #endif
2713 -       }
2714 -       free_area_init(zones_size);
2715 +
2716 +       free_area_init_nodes(max_zone_pfns);
2717  }
2718  #else
2719  extern unsigned long __init setup