- Update Xen patches to c/s 1011.
[opensuse:kernel-source.git] / patches.xen / xen3-patch-2.6.21
1 From: www.kernel.org
2 Subject: Linux 2.6.21
3 Patch-mainline: 2.6.21
4
5 Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 --- sle11sp1-2010-03-29.orig/arch/x86/Kconfig   2010-02-09 16:48:15.000000000 +0100
10 +++ sle11sp1-2010-03-29/arch/x86/Kconfig        2009-11-06 10:46:41.000000000 +0100
11 @@ -69,13 +69,15 @@ config GENERIC_CMOS_UPDATE
12  
13  config CLOCKSOURCE_WATCHDOG
14         def_bool y
15 +       depends on !X86_XEN
16  
17  config GENERIC_CLOCKEVENTS
18         def_bool y
19 +       depends on !X86_XEN
20  
21  config GENERIC_CLOCKEVENTS_BROADCAST
22         def_bool y
23 -       depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
24 +       depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN)
25  
26  config LOCKDEP_SUPPORT
27         def_bool y
28 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/Makefile   2009-11-06 10:45:48.000000000 +0100
29 +++ sle11sp1-2010-03-29/arch/x86/kernel/Makefile        2009-11-06 10:46:41.000000000 +0100
30 @@ -138,7 +138,7 @@ ifeq ($(CONFIG_X86_64),y)
31         pci-dma_64-$(CONFIG_XEN)        += pci-dma_32.o
32  endif
33  
34 -disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
35 -       smpboot_$(BITS).o tsc_$(BITS).o
36 +disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
37 +       smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
38  disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
39  %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
40 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/apic_32-xen.c      2009-11-06 10:45:48.000000000 +0100
41 +++ sle11sp1-2010-03-29/arch/x86/kernel/apic_32-xen.c   2009-11-06 10:46:41.000000000 +0100
42 @@ -25,6 +25,8 @@
43  #include <linux/kernel_stat.h>
44  #include <linux/sysdev.h>
45  #include <linux/cpu.h>
46 +#include <linux/clockchips.h>
47 +#include <linux/acpi_pmtmr.h>
48  #include <linux/module.h>
49  
50  #include <asm/atomic.h>
51 @@ -56,83 +58,26 @@ static cpumask_t timer_bcast_ipi;
52   */
53  
54  /*
55 - * Debug level
56 + * Debug level, exported for io_apic.c
57   */
58  int apic_verbosity;
59  
60  #ifndef CONFIG_XEN
61  static int modern_apic(void)
62  {
63 -       unsigned int lvr, version;
64         /* AMD systems use old APIC versions, so check the CPU */
65         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
66 -               boot_cpu_data.x86 >= 0xf)
67 +           boot_cpu_data.x86 >= 0xf)
68                 return 1;
69 -       lvr = apic_read(APIC_LVR);
70 -       version = GET_APIC_VERSION(lvr);
71 -       return version >= 0x14;
72 +       return lapic_get_version() >= 0x14;
73  }
74  #endif /* !CONFIG_XEN */
75  
76 -/*
77 - * 'what should we do if we get a hw irq event on an illegal vector'.
78 - * each architecture has to answer this themselves.
79 - */
80 -void ack_bad_irq(unsigned int irq)
81 -{
82 -       printk("unexpected IRQ trap at vector %02x\n", irq);
83 -       /*
84 -        * Currently unexpected vectors happen only on SMP and APIC.
85 -        * We _must_ ack these because every local APIC has only N
86 -        * irq slots per priority level, and a 'hanging, unacked' IRQ
87 -        * holds up an irq slot - in excessive cases (when multiple
88 -        * unexpected vectors occur) that might lock up the APIC
89 -        * completely.
90 -        * But only ack when the APIC is enabled -AK
91 -        */
92 -       if (cpu_has_apic)
93 -               ack_APIC_irq();
94 -}
95 -
96  int get_physical_broadcast(void)
97  {
98          return 0xff;
99  }
100  
101 -#ifndef CONFIG_XEN
102 -#ifndef CONFIG_SMP
103 -static void up_apic_timer_interrupt_call(void)
104 -{
105 -       int cpu = smp_processor_id();
106 -
107 -       /*
108 -        * the NMI deadlock-detector uses this.
109 -        */
110 -       per_cpu(irq_stat, cpu).apic_timer_irqs++;
111 -
112 -       smp_local_timer_interrupt();
113 -}
114 -#endif
115 -
116 -void smp_send_timer_broadcast_ipi(void)
117 -{
118 -       cpumask_t mask;
119 -
120 -       cpus_and(mask, cpu_online_map, timer_bcast_ipi);
121 -       if (!cpus_empty(mask)) {
122 -#ifdef CONFIG_SMP
123 -               send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
124 -#else
125 -               /*
126 -                * We can directly call the apic timer interrupt handler
127 -                * in UP case. Minus all irq related functions
128 -                */
129 -               up_apic_timer_interrupt_call();
130 -#endif
131 -       }
132 -}
133 -#endif
134 -
135  int setup_profiling_timer(unsigned int multiplier)
136  {
137         return -EINVAL;
138 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/common-xen.c   2009-11-06 10:46:27.000000000 +0100
139 +++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/common-xen.c        2009-11-06 10:46:41.000000000 +0100
140 @@ -612,7 +612,7 @@ void __init early_cpu_init(void)
141  struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
142  {
143         memset(regs, 0, sizeof(struct pt_regs));
144 -       regs->xgs = __KERNEL_PDA;
145 +       regs->xfs = __KERNEL_PDA;
146         return regs;
147  }
148  
149 @@ -669,12 +669,12 @@ struct i386_pda boot_pda = {
150         .pcurrent = &init_task,
151  };
152  
153 -static inline void set_kernel_gs(void)
154 +static inline void set_kernel_fs(void)
155  {
156 -       /* Set %gs for this CPU's PDA.  Memory clobber is to create a
157 +       /* Set %fs for this CPU's PDA.  Memory clobber is to create a
158            barrier with respect to any PDA operations, so the compiler
159            doesn't move any before here. */
160 -       asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
161 +       asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
162  }
163  
164  /* Initialize the CPU's GDT and PDA.  The boot CPU does this for
165 @@ -732,7 +732,7 @@ void __cpuinit cpu_set_gdt(int cpu)
166         }
167         BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
168  
169 -       set_kernel_gs();
170 +       set_kernel_fs();
171  }
172  
173  /* Common CPU init for both boot and secondary CPUs */
174 @@ -777,8 +777,8 @@ static void __cpuinit _cpu_init(int cpu,
175         __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
176  #endif
177  
178 -       /* Clear %fs. */
179 -       asm volatile ("mov %0, %%fs" : : "r" (0));
180 +       /* Clear %gs. */
181 +       asm volatile ("mov %0, %%gs" : : "r" (0));
182  
183         /* Clear all 6 debug registers: */
184         set_debugreg(0, 0);
185 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/e820_32-xen.c      2009-11-06 10:46:27.000000000 +0100
186 +++ sle11sp1-2010-03-29/arch/x86/kernel/e820_32-xen.c   2009-11-06 10:46:41.000000000 +0100
187 @@ -14,6 +14,7 @@
188  #include <asm/pgtable.h>
189  #include <asm/page.h>
190  #include <asm/e820.h>
191 +#include <asm/setup.h>
192  #include <xen/interface/memory.h>
193  
194  #ifdef CONFIG_EFI
195 @@ -157,21 +158,22 @@ static struct resource standard_io_resou
196         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
197  } };
198  
199 -static int romsignature(const unsigned char *x)
200 +#define ROMSIGNATURE 0xaa55
201 +
202 +static int __init romsignature(const unsigned char *rom)
203  {
204         unsigned short sig;
205 -       int ret = 0;
206 -       if (probe_kernel_address((const unsigned short *)x, sig) == 0)
207 -               ret = (sig == 0xaa55);
208 -       return ret;
209 +
210 +       return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
211 +              sig == ROMSIGNATURE;
212  }
213  
214  static int __init romchecksum(unsigned char *rom, unsigned long length)
215  {
216 -       unsigned char *p, sum = 0;
217 +       unsigned char sum;
218  
219 -       for (p = rom; p < rom + length; p++)
220 -               sum += *p;
221 +       for (sum = 0; length; length--)
222 +               sum += *rom++;
223         return sum == 0;
224  }
225  
226 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/entry_32-xen.S     2009-11-06 10:46:27.000000000 +0100
227 +++ sle11sp1-2010-03-29/arch/x86/kernel/entry_32-xen.S  2009-11-06 10:46:41.000000000 +0100
228 @@ -30,7 +30,7 @@
229   *     18(%esp) - %eax
230   *     1C(%esp) - %ds
231   *     20(%esp) - %es
232 - *     24(%esp) - %gs
233 + *     24(%esp) - %fs
234   *     28(%esp) - orig_eax
235   *     2C(%esp) - %eip
236   *     30(%esp) - %cs
237 @@ -102,9 +102,9 @@ NMI_MASK    = 0x80000000
238  
239  #define SAVE_ALL \
240         cld; \
241 -       pushl %gs; \
242 +       pushl %fs; \
243         CFI_ADJUST_CFA_OFFSET 4;\
244 -       /*CFI_REL_OFFSET gs, 0;*/\
245 +       /*CFI_REL_OFFSET fs, 0;*/\
246         pushl %es; \
247         CFI_ADJUST_CFA_OFFSET 4;\
248         /*CFI_REL_OFFSET es, 0;*/\
249 @@ -136,7 +136,7 @@ NMI_MASK    = 0x80000000
250         movl %edx, %ds; \
251         movl %edx, %es; \
252         movl $(__KERNEL_PDA), %edx; \
253 -       movl %edx, %gs
254 +       movl %edx, %fs
255  
256  #define RESTORE_INT_REGS \
257         popl %ebx;      \
258 @@ -169,9 +169,9 @@ NMI_MASK    = 0x80000000
259  2:     popl %es;       \
260         CFI_ADJUST_CFA_OFFSET -4;\
261         /*CFI_RESTORE es;*/\
262 -3:     popl %gs;       \
263 +3:     popl %fs;       \
264         CFI_ADJUST_CFA_OFFSET -4;\
265 -       /*CFI_RESTORE gs;*/\
266 +       /*CFI_RESTORE fs;*/\
267  .pushsection .fixup,"ax";      \
268  4:     movl $0,(%esp); \
269         jmp 1b;         \
270 @@ -230,6 +230,7 @@ ENTRY(ret_from_fork)
271         CFI_ADJUST_CFA_OFFSET -4
272         jmp syscall_exit
273         CFI_ENDPROC
274 +END(ret_from_fork)
275  
276  /*
277   * Return to user mode is not as complex as all this looks,
278 @@ -261,6 +262,7 @@ ENTRY(resume_userspace)
279                                         # int/exception return?
280         jne work_pending
281         jmp restore_all
282 +END(ret_from_exception)
283  
284  #ifdef CONFIG_PREEMPT
285  ENTRY(resume_kernel)
286 @@ -275,6 +277,7 @@ need_resched:
287         jz restore_all
288         call preempt_schedule_irq
289         jmp need_resched
290 +END(resume_kernel)
291  #endif
292         CFI_ENDPROC
293  
294 @@ -352,16 +355,17 @@ sysenter_past_esp:
295         movl PT_OLDESP(%esp), %ecx
296         xorl %ebp,%ebp
297         TRACE_IRQS_ON
298 -1:     mov  PT_GS(%esp), %gs
299 +1:     mov  PT_FS(%esp), %fs
300         ENABLE_INTERRUPTS_SYSEXIT
301         CFI_ENDPROC
302  .pushsection .fixup,"ax"
303 -2:     movl $0,PT_GS(%esp)
304 +2:     movl $0,PT_FS(%esp)
305         jmp 1b
306  .section __ex_table,"a"
307         .align 4
308         .long 1b,2b
309  .popsection
310 +ENDPROC(sysenter_entry)
311  
312         # pv sysenter call handler stub
313  ENTRY(sysenter_entry_pv)
314 @@ -533,6 +537,7 @@ hypervisor_iret:
315         jmp  hypercall_page + (__HYPERVISOR_iret * 32)
316  #endif
317         CFI_ENDPROC
318 +ENDPROC(system_call)
319  
320         # perform work that needs to be done immediately before resumption
321         ALIGN
322 @@ -578,6 +583,7 @@ work_notifysig_v86:
323         xorl %edx, %edx
324         call do_notify_resume
325         jmp resume_userspace_sig
326 +END(work_pending)
327  
328         # perform syscall exit tracing
329         ALIGN
330 @@ -593,6 +599,7 @@ syscall_trace_entry:
331         cmpl $(nr_syscalls), %eax
332         jnae syscall_call
333         jmp syscall_exit
334 +END(syscall_trace_entry)
335  
336         # perform syscall exit tracing
337         ALIGN
338 @@ -606,6 +613,7 @@ syscall_exit_work:
339         movl $1, %edx
340         call do_syscall_trace
341         jmp resume_userspace
342 +END(syscall_exit_work)
343         CFI_ENDPROC
344  
345         RING0_INT_FRAME                 # can't unwind into user space anyway
346 @@ -616,16 +624,18 @@ syscall_fault:
347         GET_THREAD_INFO(%ebp)
348         movl $-EFAULT,PT_EAX(%esp)
349         jmp resume_userspace
350 +END(syscall_fault)
351  
352  syscall_badsys:
353         movl $-ENOSYS,PT_EAX(%esp)
354         jmp resume_userspace
355 +END(syscall_badsys)
356         CFI_ENDPROC
357  
358  #ifndef CONFIG_XEN
359  #define FIXUP_ESPFIX_STACK \
360         /* since we are on a wrong stack, we cant make it a C code :( */ \
361 -       movl %gs:PDA_cpu, %ebx; \
362 +       movl %fs:PDA_cpu, %ebx; \
363         PER_CPU(cpu_gdt_descr, %ebx); \
364         movl GDS_address(%ebx), %ebx; \
365         GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
366 @@ -656,9 +666,9 @@ syscall_badsys:
367  ENTRY(interrupt)
368  .text
369  
370 -vector=0
371  ENTRY(irq_entries_start)
372         RING0_INT_FRAME
373 +vector=0
374  .rept NR_IRQS
375         ALIGN
376   .if vector
377 @@ -667,11 +677,16 @@ ENTRY(irq_entries_start)
378  1:     pushl $~(vector)
379         CFI_ADJUST_CFA_OFFSET 4
380         jmp common_interrupt
381 -.data
382 + .previous
383         .long 1b
384 -.text
385 + .text
386  vector=vector+1
387  .endr
388 +END(irq_entries_start)
389 +
390 +.previous
391 +END(interrupt)
392 +.previous
393  
394  /*
395   * the CPU automatically disables interrupts when executing an IRQ vector,
396 @@ -684,6 +699,7 @@ common_interrupt:
397         movl %esp,%eax
398         call do_IRQ
399         jmp ret_from_intr
400 +ENDPROC(common_interrupt)
401         CFI_ENDPROC
402  
403  #define BUILD_INTERRUPT(name, nr)      \
404 @@ -696,10 +712,16 @@ ENTRY(name)                               \
405         movl %esp,%eax;                 \
406         call smp_/**/name;              \
407         jmp ret_from_intr;              \
408 -       CFI_ENDPROC
409 +       CFI_ENDPROC;                    \
410 +ENDPROC(name)
411  
412  /* The include is where all of the SMP etc. interrupts come from */
413  #include "entry_arch.h"
414 +
415 +/* This alternate entry is needed because we hijack the apic LVTT */
416 +#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
417 +BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
418 +#endif
419  #else
420  #define UNWIND_ESPFIX_STACK
421  #endif
422 @@ -710,7 +732,7 @@ KPROBE_ENTRY(page_fault)
423         CFI_ADJUST_CFA_OFFSET 4
424         ALIGN
425  error_code:
426 -       /* the function address is in %gs's slot on the stack */
427 +       /* the function address is in %fs's slot on the stack */
428         pushl %es
429         CFI_ADJUST_CFA_OFFSET 4
430         /*CFI_REL_OFFSET es, 0*/
431 @@ -739,20 +761,20 @@ error_code:
432         CFI_ADJUST_CFA_OFFSET 4
433         CFI_REL_OFFSET ebx, 0
434         cld
435 -       pushl %gs
436 +       pushl %fs
437         CFI_ADJUST_CFA_OFFSET 4
438 -       /*CFI_REL_OFFSET gs, 0*/
439 +       /*CFI_REL_OFFSET fs, 0*/
440         movl $(__KERNEL_PDA), %ecx
441 -       movl %ecx, %gs
442 +       movl %ecx, %fs
443         UNWIND_ESPFIX_STACK
444         popl %ecx
445         CFI_ADJUST_CFA_OFFSET -4
446         /*CFI_REGISTER es, ecx*/
447 -       movl PT_GS(%esp), %edi          # get the function address
448 +       movl PT_FS(%esp), %edi          # get the function address
449         movl PT_ORIG_EAX(%esp), %edx    # get the error code
450         movl $-1, PT_ORIG_EAX(%esp)     # no syscall to restart
451 -       mov  %ecx, PT_GS(%esp)
452 -       /*CFI_REL_OFFSET gs, ES*/
453 +       mov  %ecx, PT_FS(%esp)
454 +       /*CFI_REL_OFFSET fs, ES*/
455         movl $(__USER_DS), %ecx
456         movl %ecx, %ds
457         movl %ecx, %es
458 @@ -844,7 +866,7 @@ critical_fixup_table:
459         .byte 6                         # pop  %eax
460         .byte 7                         # pop  %ds
461         .byte 8                         # pop  %es
462 -       .byte 9,9                       # pop  %gs
463 +       .byte 9,9                       # pop  %fs
464         .byte 10,10,10                  # add  $4,%esp
465         .byte 11                        # iret
466         .byte -1,-1,-1,-1               # movb $1,1(%esi) = __DISABLE_INTERRUPTS
467 @@ -909,6 +931,7 @@ ENTRY(coprocessor_error)
468         CFI_ADJUST_CFA_OFFSET 4
469         jmp error_code
470         CFI_ENDPROC
471 +END(coprocessor_error)
472  
473  ENTRY(simd_coprocessor_error)
474         RING0_INT_FRAME
475 @@ -918,6 +941,7 @@ ENTRY(simd_coprocessor_error)
476         CFI_ADJUST_CFA_OFFSET 4
477         jmp error_code
478         CFI_ENDPROC
479 +END(simd_coprocessor_error)
480  
481  ENTRY(device_not_available)
482         RING0_INT_FRAME
483 @@ -940,6 +964,7 @@ device_available_emulate:
484         call math_state_restore
485         jmp ret_from_exception
486         CFI_ENDPROC
487 +END(device_not_available)
488  
489  #ifndef CONFIG_XEN
490  /*
491 @@ -1101,10 +1126,12 @@ ENTRY(native_iret)
492         .align 4
493         .long 1b,iret_exc
494  .previous
495 +END(native_iret)
496  
497  ENTRY(native_irq_enable_sysexit)
498         sti
499         sysexit
500 +END(native_irq_enable_sysexit)
501  #endif
502  
503  KPROBE_ENTRY(int3)
504 @@ -1127,6 +1154,7 @@ ENTRY(overflow)
505         CFI_ADJUST_CFA_OFFSET 4
506         jmp error_code
507         CFI_ENDPROC
508 +END(overflow)
509  
510  ENTRY(bounds)
511         RING0_INT_FRAME
512 @@ -1136,6 +1164,7 @@ ENTRY(bounds)
513         CFI_ADJUST_CFA_OFFSET 4
514         jmp error_code
515         CFI_ENDPROC
516 +END(bounds)
517  
518  ENTRY(invalid_op)
519         RING0_INT_FRAME
520 @@ -1145,6 +1174,7 @@ ENTRY(invalid_op)
521         CFI_ADJUST_CFA_OFFSET 4
522         jmp error_code
523         CFI_ENDPROC
524 +END(invalid_op)
525  
526  ENTRY(coprocessor_segment_overrun)
527         RING0_INT_FRAME
528 @@ -1154,6 +1184,7 @@ ENTRY(coprocessor_segment_overrun)
529         CFI_ADJUST_CFA_OFFSET 4
530         jmp error_code
531         CFI_ENDPROC
532 +END(coprocessor_segment_overrun)
533  
534  ENTRY(invalid_TSS)
535         RING0_EC_FRAME
536 @@ -1161,6 +1192,7 @@ ENTRY(invalid_TSS)
537         CFI_ADJUST_CFA_OFFSET 4
538         jmp error_code
539         CFI_ENDPROC
540 +END(invalid_TSS)
541  
542  ENTRY(segment_not_present)
543         RING0_EC_FRAME
544 @@ -1168,6 +1200,7 @@ ENTRY(segment_not_present)
545         CFI_ADJUST_CFA_OFFSET 4
546         jmp error_code
547         CFI_ENDPROC
548 +END(segment_not_present)
549  
550  ENTRY(stack_segment)
551         RING0_EC_FRAME
552 @@ -1175,6 +1208,7 @@ ENTRY(stack_segment)
553         CFI_ADJUST_CFA_OFFSET 4
554         jmp error_code
555         CFI_ENDPROC
556 +END(stack_segment)
557  
558  KPROBE_ENTRY(general_protection)
559         RING0_EC_FRAME
560 @@ -1190,6 +1224,7 @@ ENTRY(alignment_check)
561         CFI_ADJUST_CFA_OFFSET 4
562         jmp error_code
563         CFI_ENDPROC
564 +END(alignment_check)
565  
566  ENTRY(divide_error)
567         RING0_INT_FRAME
568 @@ -1199,6 +1234,7 @@ ENTRY(divide_error)
569         CFI_ADJUST_CFA_OFFSET 4
570         jmp error_code
571         CFI_ENDPROC
572 +END(divide_error)
573  
574  #ifdef CONFIG_X86_MCE
575  ENTRY(machine_check)
576 @@ -1209,6 +1245,7 @@ ENTRY(machine_check)
577         CFI_ADJUST_CFA_OFFSET 4
578         jmp error_code
579         CFI_ENDPROC
580 +END(machine_check)
581  #endif
582  
583  #ifndef CONFIG_XEN
584 @@ -1228,6 +1265,7 @@ ENTRY(fixup_4gb_segment)
585         CFI_ADJUST_CFA_OFFSET 4
586         jmp error_code
587         CFI_ENDPROC
588 +END(spurious_interrupt_bug)
589  
590  ENTRY(kernel_thread_helper)
591         pushl $0                # fake return address for unwinder
592 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/head_32-xen.S      2009-11-06 10:46:27.000000000 +0100
593 +++ sle11sp1-2010-03-29/arch/x86/kernel/head_32-xen.S   2009-11-06 10:46:41.000000000 +0100
594 @@ -27,6 +27,7 @@
595  #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
596  #define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
597  
598 +.section .text.head,"ax",@progbits
599  #define VIRT_ENTRY_OFFSET 0x0
600  .org VIRT_ENTRY_OFFSET
601  ENTRY(startup_32)
602 @@ -60,11 +61,11 @@ ENTRY(startup_32)
603  
604         movb $1,X86_HARD_MATH
605  
606 -       xorl %eax,%eax                  # Clear FS
607 -       movl %eax,%fs
608 +       xorl %eax,%eax          # Clear GS
609 +       movl %eax,%gs
610  
611         movl $(__KERNEL_PDA),%eax
612 -       mov  %eax,%gs
613 +       mov  %eax,%fs
614  
615         cld                     # gcc2 wants the direction flag cleared at all times
616  
617 @@ -75,7 +76,7 @@ ENTRY(startup_32)
618   * Point the GDT at this CPU's PDA.  This will be
619   * cpu_gdt_table and boot_pda.
620   */
621 -setup_pda:
622 +ENTRY(setup_pda)
623         /* get the PDA pointer */
624         movl $boot_pda, %eax
625  
626 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/io_apic_32-xen.c   2009-11-06 10:46:27.000000000 +0100
627 +++ sle11sp1-2010-03-29/arch/x86/kernel/io_apic_32-xen.c        2009-11-06 10:46:41.000000000 +0100
628 @@ -167,7 +167,7 @@ static inline void io_apic_write(unsigne
629   */
630  static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
631  {
632 -       volatile struct io_apic *io_apic = io_apic_base(apic);
633 +       volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
634         if (sis_apic_bug)
635                 writel(reg, &io_apic->index);
636         writel(value, &io_apic->data);
637 @@ -392,7 +392,7 @@ static void set_ioapic_affinity_irq(unsi
638                         break;
639                 entry = irq_2_pin + entry->next;
640         }
641 -       set_native_irq_info(irq, cpumask);
642 +       irq_desc[irq].affinity = cpumask;
643         spin_unlock_irqrestore(&ioapic_lock, flags);
644  }
645  
646 @@ -531,8 +531,8 @@ static void do_irq_balance(void)
647                 package_index = CPU_TO_PACKAGEINDEX(i);
648                 for (j = 0; j < NR_IRQS; j++) {
649                         unsigned long value_now, delta;
650 -                       /* Is this an active IRQ? */
651 -                       if (!irq_desc[j].action)
652 +                       /* Is this an active IRQ or balancing disabled ? */
653 +                       if (!irq_desc[j].action || irq_balancing_disabled(j))
654                                 continue;
655                         if ( package_index == i )
656                                 IRQ_DELTA(package_index,j) = 0;
657 @@ -785,7 +785,7 @@ failed:
658         return 0;
659  }
660  
661 -int __init irqbalance_disable(char *str)
662 +int __devinit irqbalance_disable(char *str)
663  {
664         irqbalance_disabled = 1;
665         return 1;
666 @@ -1329,11 +1329,9 @@ static void ioapic_register_intr(int irq
667                         trigger == IOAPIC_LEVEL)
668                 set_irq_chip_and_handler_name(irq, &ioapic_chip,
669                                          handle_fasteoi_irq, "fasteoi");
670 -       else {
671 -               irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
672 +       else
673                 set_irq_chip_and_handler_name(irq, &ioapic_chip,
674                                          handle_edge_irq, "edge");
675 -       }
676         set_intr_gate(vector, interrupt[irq]);
677  }
678  #else
679 @@ -1407,7 +1405,6 @@ static void __init setup_IO_APIC_irqs(vo
680                 }
681                 spin_lock_irqsave(&ioapic_lock, flags);
682                 __ioapic_write_entry(apic, pin, entry);
683 -               set_native_irq_info(irq, TARGET_CPUS);
684                 spin_unlock_irqrestore(&ioapic_lock, flags);
685         }
686         }
687 @@ -1638,7 +1635,7 @@ void /*__init*/ print_local_APIC(void * 
688         v = apic_read(APIC_LVR);
689         printk(KERN_INFO "... APIC VERSION: %08x\n", v);
690         ver = GET_APIC_VERSION(v);
691 -       maxlvt = get_maxlvt();
692 +       maxlvt = lapic_get_maxlvt();
693  
694         v = apic_read(APIC_TASKPRI);
695         printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
696 @@ -1976,7 +1973,7 @@ static void __init setup_ioapic_ids_from
697  #endif
698  
699  #ifndef CONFIG_XEN
700 -static int no_timer_check __initdata;
701 +int no_timer_check __initdata;
702  
703  static int __init notimercheck(char *s)
704  {
705 @@ -2369,7 +2366,7 @@ static inline void __init check_timer(vo
706  
707         disable_8259A_irq(0);
708         set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
709 -                                     "fasteio");
710 +                                     "fasteoi");
711         apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
712         enable_8259A_irq(0);
713  
714 @@ -2662,7 +2659,7 @@ static void set_msi_irq_affinity(unsigne
715         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
716  
717         write_msi_msg(irq, &msg);
718 -       set_native_irq_info(irq, mask);
719 +       irq_desc[irq].affinity = mask;
720  }
721  #endif /* CONFIG_SMP */
722  
723 @@ -2681,25 +2678,32 @@ static struct irq_chip msi_chip = {
724         .retrigger      = ioapic_retrigger_irq,
725  };
726  
727 -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
728 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
729  {
730         struct msi_msg msg;
731 -       int ret;
732 +       int irq, ret;
733 +       irq = create_irq();
734 +       if (irq < 0)
735 +               return irq;
736 +
737 +       set_irq_msi(irq, desc);
738         ret = msi_compose_msg(dev, irq, &msg);
739 -       if (ret < 0)
740 +       if (ret < 0) {
741 +               destroy_irq(irq);
742                 return ret;
743 +       }
744  
745         write_msi_msg(irq, &msg);
746  
747         set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
748                                       "edge");
749  
750 -       return 0;
751 +       return irq;
752  }
753  
754  void arch_teardown_msi_irq(unsigned int irq)
755  {
756 -       return;
757 +       destroy_irq(irq);
758  }
759  
760  #endif /* CONFIG_PCI_MSI */
761 @@ -2739,7 +2743,7 @@ static void set_ht_irq_affinity(unsigned
762         dest = cpu_mask_to_apicid(mask);
763  
764         target_ht_irq(irq, dest);
765 -       set_native_irq_info(irq, mask);
766 +       irq_desc[irq].affinity = mask;
767  }
768  #endif
769  
770 @@ -2947,7 +2951,6 @@ int io_apic_set_pci_routing (int ioapic,
771  
772         spin_lock_irqsave(&ioapic_lock, flags);
773         __ioapic_write_entry(ioapic, pin, entry);
774 -       set_native_irq_info(irq, TARGET_CPUS);
775         spin_unlock_irqrestore(&ioapic_lock, flags);
776  
777         return 0;
778 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/irq_32-xen.c       2009-11-06 10:45:48.000000000 +0100
779 +++ sle11sp1-2010-03-29/arch/x86/kernel/irq_32-xen.c    2009-11-06 10:46:41.000000000 +0100
780 @@ -10,7 +10,6 @@
781   * io_apic.c.)
782   */
783  
784 -#include <asm/uaccess.h>
785  #include <linux/module.h>
786  #include <linux/seq_file.h>
787  #include <linux/interrupt.h>
788 @@ -19,19 +18,34 @@
789  #include <linux/cpu.h>
790  #include <linux/delay.h>
791  
792 +#include <asm/apic.h>
793 +#include <asm/uaccess.h>
794 +
795  DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
796  EXPORT_PER_CPU_SYMBOL(irq_stat);
797  
798 -#ifndef CONFIG_X86_LOCAL_APIC
799  /*
800   * 'what should we do if we get a hw irq event on an illegal vector'.
801   * each architecture has to answer this themselves.
802   */
803  void ack_bad_irq(unsigned int irq)
804  {
805 -       printk("unexpected IRQ trap at vector %02x\n", irq);
806 -}
807 +       printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
808 +
809 +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
810 +       /*
811 +        * Currently unexpected vectors happen only on SMP and APIC.
812 +        * We _must_ ack these because every local APIC has only N
813 +        * irq slots per priority level, and a 'hanging, unacked' IRQ
814 +        * holds up an irq slot - in excessive cases (when multiple
815 +        * unexpected vectors occur) that might lock up the APIC
816 +        * completely.
817 +        * But only ack when the APIC is enabled -AK
818 +        */
819 +       if (cpu_has_apic)
820 +               ack_APIC_irq();
821  #endif
822 +}
823  
824  #ifdef CONFIG_4KSTACKS
825  /*
826 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/microcode-xen.c    2009-11-06 10:46:27.000000000 +0100
827 +++ sle11sp1-2010-03-29/arch/x86/kernel/microcode-xen.c 2009-11-06 10:46:41.000000000 +0100
828 @@ -108,7 +108,7 @@ static ssize_t microcode_write (struct f
829         return ret;
830  }
831  
832 -static struct file_operations microcode_fops = {
833 +static const struct file_operations microcode_fops = {
834         .owner          = THIS_MODULE,
835         .write          = microcode_write,
836         .open           = microcode_open,
837 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/mpparse_32-xen.c   2009-11-06 10:46:27.000000000 +0100
838 +++ sle11sp1-2010-03-29/arch/x86/kernel/mpparse_32-xen.c        2009-11-06 10:46:41.000000000 +0100
839 @@ -1079,7 +1079,7 @@ int mp_register_gsi(u32 gsi, int trigger
840         static int              gsi_to_irq[MAX_GSI_NUM];
841  
842         /* Don't set up the ACPI SCI because it's already set up */
843 -       if (acpi_fadt.sci_int == gsi)
844 +       if (acpi_gbl_FADT.sci_interrupt == gsi)
845                 return gsi;
846  
847         ioapic = mp_find_ioapic(gsi);
848 @@ -1136,7 +1136,7 @@ int mp_register_gsi(u32 gsi, int trigger
849                         /*
850                          * Don't assign IRQ used by ACPI SCI
851                          */
852 -                       if (gsi == acpi_fadt.sci_int)
853 +                       if (gsi == acpi_gbl_FADT.sci_interrupt)
854                                 gsi = pci_irq++;
855                         gsi_to_irq[irq] = gsi;
856                 } else {
857 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/pci-dma-xen.c      2009-11-06 10:46:27.000000000 +0100
858 +++ sle11sp1-2010-03-29/arch/x86/kernel/pci-dma-xen.c   2009-11-06 10:46:41.000000000 +0100
859 @@ -308,7 +308,7 @@ int dma_declare_coherent_memory(struct d
860         return DMA_MEMORY_IO;
861  
862   free1_out:
863 -       kfree(dev->dma_mem->bitmap);
864 +       kfree(dev->dma_mem);
865   out:
866         if (mem_base)
867                 iounmap(mem_base);
868 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/pcspeaker.c        2010-03-29 09:00:35.000000000 +0200
869 +++ sle11sp1-2010-03-29/arch/x86/kernel/pcspeaker.c     2009-11-06 10:46:41.000000000 +0100
870 @@ -6,6 +6,11 @@ static __init int add_pcspkr(void)
871  {
872         struct platform_device *pd;
873  
874 +#ifdef CONFIG_XEN
875 +       if (!is_initial_xendomain())
876 +               return 0;
877 +#endif
878 +
879         pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
880  
881         return IS_ERR(pd) ? PTR_ERR(pd) : 0;
882 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/process_32-xen.c   2009-11-06 10:46:27.000000000 +0100
883 +++ sle11sp1-2010-03-29/arch/x86/kernel/process_32-xen.c        2009-11-06 10:46:41.000000000 +0100
884 @@ -38,6 +38,7 @@
885  #include <linux/ptrace.h>
886  #include <linux/random.h>
887  #include <linux/personality.h>
888 +#include <linux/tick.h>
889  
890  #include <asm/uaccess.h>
891  #include <asm/pgtable.h>
892 @@ -160,6 +161,7 @@ void cpu_idle(void)
893  
894         /* endless idle loop with no priority at all */
895         while (1) {
896 +               tick_nohz_stop_sched_tick();
897                 while (!need_resched()) {
898                         void (*idle)(void);
899  
900 @@ -175,6 +177,7 @@ void cpu_idle(void)
901                         __get_cpu_var(irq_stat).idle_timestamp = jiffies;
902                         idle();
903                 }
904 +               tick_nohz_restart_sched_tick();
905                 preempt_enable_no_resched();
906                 schedule();
907                 preempt_disable();
908 @@ -247,8 +250,8 @@ void show_regs(struct pt_regs * regs)
909                 regs->eax,regs->ebx,regs->ecx,regs->edx);
910         printk("ESI: %08lx EDI: %08lx EBP: %08lx",
911                 regs->esi, regs->edi, regs->ebp);
912 -       printk(" DS: %04x ES: %04x GS: %04x\n",
913 -              0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
914 +       printk(" DS: %04x ES: %04x FS: %04x\n",
915 +              0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
916  
917         cr0 = read_cr0();
918         cr2 = read_cr2();
919 @@ -279,7 +282,7 @@ int kernel_thread(int (*fn)(void *), voi
920  
921         regs.xds = __USER_DS;
922         regs.xes = __USER_DS;
923 -       regs.xgs = __KERNEL_PDA;
924 +       regs.xfs = __KERNEL_PDA;
925         regs.orig_eax = -1;
926         regs.eip = (unsigned long) kernel_thread_helper;
927         regs.xcs = __KERNEL_CS | get_kernel_rpl();
928 @@ -356,7 +359,7 @@ int copy_thread(int nr, unsigned long cl
929  
930         p->thread.eip = (unsigned long) ret_from_fork;
931  
932 -       savesegment(fs,p->thread.fs);
933 +       savesegment(gs,p->thread.gs);
934  
935         tsk = current;
936         if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
937 @@ -434,8 +437,8 @@ void dump_thread(struct pt_regs * regs, 
938         dump->regs.eax = regs->eax;
939         dump->regs.ds = regs->xds;
940         dump->regs.es = regs->xes;
941 -       savesegment(fs,dump->regs.fs);
942 -       dump->regs.gs = regs->xgs;
943 +       dump->regs.fs = regs->xfs;
944 +       savesegment(gs,dump->regs.gs);
945         dump->regs.orig_eax = regs->orig_eax;
946         dump->regs.eip = regs->eip;
947         dump->regs.cs = regs->xcs;
948 @@ -637,16 +640,6 @@ struct task_struct fastcall * __switch_t
949                 prefetch(&next->i387.fxsave);
950  
951         /*
952 -        * Restore %fs if needed.
953 -        *
954 -        * Glibc normally makes %fs be zero.
955 -        */
956 -       if (unlikely(next->fs))
957 -               loadsegment(fs, next->fs);
958 -
959 -       write_pda(pcurrent, next_p);
960 -
961 -       /*
962          * Now maybe handle debug registers
963          */
964         if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
965 @@ -654,6 +647,15 @@ struct task_struct fastcall * __switch_t
966  
967         disable_tsc(prev_p, next_p);
968  
969 +       /*
970 +        * Leave lazy mode, flushing any hypercalls made here.
971 +        * This must be done before restoring TLS segments so
972 +        * the GDT and LDT are properly updated, and must be
973 +        * done before math_state_restore, so the TS bit is up
974 +        * to date.
975 +        */
976 +       arch_leave_lazy_cpu_mode();
977 +
978         /* If the task has used fpu the last 5 timeslices, just do a full
979          * restore of the math state immediately to avoid the trap; the
980          * chances of needing FPU soon are obviously high now
981 @@ -661,6 +663,14 @@ struct task_struct fastcall * __switch_t
982         if (next_p->fpu_counter > 5)
983                 math_state_restore();
984  
985 +       /*
986 +        * Restore %gs if needed (which is common)
987 +        */
988 +       if (prev->gs | next->gs)
989 +               loadsegment(gs, next->gs);
990 +
991 +       write_pda(pcurrent, next_p);
992 +
993         return prev_p;
994  }
995  
996 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/setup_32-xen.c     2009-11-06 10:46:27.000000000 +0100
997 +++ sle11sp1-2010-03-29/arch/x86/kernel/setup_32-xen.c  2009-11-06 10:46:41.000000000 +0100
998 @@ -33,7 +33,6 @@
999  #include <linux/initrd.h>
1000  #include <linux/bootmem.h>
1001  #include <linux/seq_file.h>
1002 -#include <linux/platform_device.h>
1003  #include <linux/console.h>
1004  #include <linux/mca.h>
1005  #include <linux/root_dev.h>
1006 @@ -148,7 +147,7 @@ unsigned long saved_videomode;
1007  #define RAMDISK_PROMPT_FLAG            0x8000
1008  #define RAMDISK_LOAD_FLAG              0x4000  
1009  
1010 -static char command_line[COMMAND_LINE_SIZE];
1011 +static char __initdata command_line[COMMAND_LINE_SIZE];
1012  
1013  unsigned char __initdata boot_params[PARAM_SIZE];
1014  
1015 @@ -649,8 +648,8 @@ void __init setup_arch(char **cmdline_p)
1016  
1017         if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
1018                 i = COMMAND_LINE_SIZE;
1019 -       memcpy(saved_command_line, xen_start_info->cmd_line, i);
1020 -       saved_command_line[i - 1] = '\0';
1021 +       memcpy(boot_command_line, xen_start_info->cmd_line, i);
1022 +       boot_command_line[i - 1] = '\0';
1023         parse_early_param();
1024  
1025         if (user_defined_memmap) {
1026 @@ -658,11 +657,19 @@ void __init setup_arch(char **cmdline_p)
1027                 print_memory_map("user");
1028         }
1029  
1030 -       strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
1031 +       strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
1032         *cmdline_p = command_line;
1033  
1034         max_low_pfn = setup_memory();
1035  
1036 +#ifdef CONFIG_VMI
1037 +       /*
1038 +        * Must be after max_low_pfn is determined, and before kernel
1039 +        * pagetables are setup.
1040 +        */
1041 +       vmi_init();
1042 +#endif
1043 +
1044         /*
1045          * NOTE: before this point _nobody_ is allowed to allocate
1046          * any memory using the bootmem allocator.  Although the
1047 @@ -825,7 +832,6 @@ void __init setup_arch(char **cmdline_p)
1048                 conswitchp = &dummy_con;
1049  #endif
1050         }
1051 -       tsc_init();
1052  }
1053  
1054  static int
1055 @@ -835,31 +841,3 @@ xen_panic_event(struct notifier_block *t
1056         /* we're never actually going to get here... */
1057         return NOTIFY_DONE;
1058  }
1059 -
1060 -static __init int add_pcspkr(void)
1061 -{
1062 -       struct platform_device *pd;
1063 -       int ret;
1064 -
1065 -       if (!is_initial_xendomain())
1066 -               return 0;
1067 -
1068 -       pd = platform_device_alloc("pcspkr", -1);
1069 -       if (!pd)
1070 -               return -ENOMEM;
1071 -
1072 -       ret = platform_device_add(pd);
1073 -       if (ret)
1074 -               platform_device_put(pd);
1075 -
1076 -       return ret;
1077 -}
1078 -device_initcall(add_pcspkr);
1079 -
1080 -/*
1081 - * Local Variables:
1082 - * mode:c
1083 - * c-file-style:"k&r"
1084 - * c-basic-offset:8
1085 - * End:
1086 - */
1087 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/smp_32-xen.c       2009-11-06 10:46:27.000000000 +0100
1088 +++ sle11sp1-2010-03-29/arch/x86/kernel/smp_32-xen.c    2009-11-06 10:46:41.000000000 +0100
1089 @@ -335,8 +335,7 @@ static void flush_tlb_others(cpumask_t c
1090         /*
1091          * i'm not happy about this global shared spinlock in the
1092          * MM hot path, but we'll see how contended it is.
1093 -        * Temporarily this turns IRQs off, so that lockups are
1094 -        * detected by the NMI watchdog.
1095 +        * AK: x86-64 has a faster method that could be ported.
1096          */
1097         spin_lock(&tlbstate_lock);
1098         
1099 @@ -361,7 +360,7 @@ static void flush_tlb_others(cpumask_t c
1100  
1101         while (!cpus_empty(flush_cpumask))
1102                 /* nothing. lockup detection does not belong here */
1103 -               mb();
1104 +               cpu_relax();
1105  
1106         flush_mm = NULL;
1107         flush_va = 0;
1108 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/time-xen.c 2010-02-04 09:41:40.000000000 +0100
1109 +++ sle11sp1-2010-03-29/arch/x86/kernel/time-xen.c      2010-03-01 14:29:27.000000000 +0100
1110 @@ -50,6 +50,7 @@
1111  #include <linux/kernel_stat.h>
1112  #include <linux/posix-timers.h>
1113  #include <linux/cpufreq.h>
1114 +#include <linux/clocksource.h>
1115  
1116  #include <asm/io.h>
1117  #include <asm/smp.h>
1118 @@ -74,25 +75,17 @@
1119  #include <xen/evtchn.h>
1120  #include <xen/interface/vcpu.h>
1121  
1122 -#if defined (__i386__)
1123 -#include <asm/i8259.h>
1124 +#ifdef CONFIG_X86_32
1125  #include <asm/i8253.h>
1126  DEFINE_SPINLOCK(i8253_lock);
1127  EXPORT_SYMBOL(i8253_lock);
1128 -#endif
1129 -
1130 -#define XEN_SHIFT 22
1131 -
1132  int pit_latch_buggy;              /* extern */
1133 -
1134 -#if defined(__x86_64__)
1135 -unsigned long vxtime_hz = PIT_TICK_RATE;
1136 -struct vxtime_data __vxtime __section_vxtime;   /* for vsyscalls */
1137 +#else
1138  volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
1139 -struct timespec __xtime __section_xtime;
1140 -struct timezone __sys_tz __section_sys_tz;
1141  #endif
1142  
1143 +#define XEN_SHIFT 22
1144 +
1145  unsigned int cpu_khz;  /* Detected as we calibrate the TSC */
1146  EXPORT_SYMBOL(cpu_khz);
1147  
1148 @@ -112,9 +105,6 @@ static DEFINE_PER_CPU(struct shadow_time
1149  static struct timespec shadow_tv;
1150  static u32 shadow_tv_version;
1151  
1152 -static struct timeval monotonic_tv;
1153 -static spinlock_t monotonic_lock = SPIN_LOCK_UNLOCKED;
1154 -
1155  /* Keep track of last time we did processing/updating of jiffies and xtime. */
1156  static u64 processed_system_time;   /* System time (ns) at last processing. */
1157  static DEFINE_PER_CPU(u64, processed_system_time);
1158 @@ -209,7 +199,7 @@ static inline u64 scale_delta(u64 delta,
1159         return product;
1160  }
1161  
1162 -void init_cpu_khz(void)
1163 +static void init_cpu_khz(void)
1164  {
1165         u64 __cpu_khz = 1000000ULL << 32;
1166         struct vcpu_time_info *info = &vcpu_info(0)->time;
1167 @@ -228,16 +218,6 @@ static u64 get_nsec_offset(struct shadow
1168         return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
1169  }
1170  
1171 -#ifdef CONFIG_X86_64
1172 -static unsigned long get_usec_offset(struct shadow_time_info *shadow)
1173 -{
1174 -       u64 now, delta;
1175 -       rdtscll(now);
1176 -       delta = now - shadow->tsc_timestamp;
1177 -       return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
1178 -}
1179 -#endif
1180 -
1181  static void __update_wallclock(time_t sec, long nsec)
1182  {
1183         long wtm_nsec, xtime_nsec;
1184 @@ -350,142 +330,6 @@ void rtc_cmos_write(unsigned char val, u
1185  }
1186  EXPORT_SYMBOL(rtc_cmos_write);
1187  
1188 -#ifdef CONFIG_X86_64
1189 -
1190 -/*
1191 - * This version of gettimeofday has microsecond resolution
1192 - * and better than microsecond precision on fast x86 machines with TSC.
1193 - */
1194 -void do_gettimeofday(struct timeval *tv)
1195 -{
1196 -       unsigned long seq;
1197 -       unsigned long usec, sec;
1198 -       unsigned long flags;
1199 -       s64 nsec;
1200 -       unsigned int cpu;
1201 -       struct shadow_time_info *shadow;
1202 -       u32 local_time_version;
1203 -
1204 -       cpu = get_cpu();
1205 -       shadow = &per_cpu(shadow_time, cpu);
1206 -
1207 -       do {
1208 -               local_time_version = shadow->version;
1209 -               seq = read_seqbegin(&xtime_lock);
1210 -
1211 -               usec = get_usec_offset(shadow);
1212 -
1213 -               sec = xtime.tv_sec;
1214 -               usec += (xtime.tv_nsec / NSEC_PER_USEC);
1215 -
1216 -               nsec = shadow->system_timestamp - processed_system_time;
1217 -               __normalize_time(&sec, &nsec);
1218 -               usec += (long)nsec / NSEC_PER_USEC;
1219 -
1220 -               if (unlikely(!time_values_up_to_date(cpu))) {
1221 -                       /*
1222 -                        * We may have blocked for a long time,
1223 -                        * rendering our calculations invalid
1224 -                        * (e.g. the time delta may have
1225 -                        * overflowed). Detect that and recalculate
1226 -                        * with fresh values.
1227 -                        */
1228 -                       get_time_values_from_xen(cpu);
1229 -                       continue;
1230 -               }
1231 -       } while (read_seqretry(&xtime_lock, seq) ||
1232 -                (local_time_version != shadow->version));
1233 -
1234 -       put_cpu();
1235 -
1236 -       while (usec >= USEC_PER_SEC) {
1237 -               usec -= USEC_PER_SEC;
1238 -               sec++;
1239 -       }
1240 -
1241 -       spin_lock_irqsave(&monotonic_lock, flags);
1242 -       if ((sec > monotonic_tv.tv_sec) ||
1243 -           ((sec == monotonic_tv.tv_sec) && (usec > monotonic_tv.tv_usec)))
1244 -       {
1245 -               monotonic_tv.tv_sec = sec;
1246 -               monotonic_tv.tv_usec = usec;
1247 -       } else {
1248 -               sec = monotonic_tv.tv_sec;
1249 -               usec = monotonic_tv.tv_usec;
1250 -       }
1251 -       spin_unlock_irqrestore(&monotonic_lock, flags);
1252 -
1253 -       tv->tv_sec = sec;
1254 -       tv->tv_usec = usec;
1255 -}
1256 -
1257 -EXPORT_SYMBOL(do_gettimeofday);
1258 -
1259 -int do_settimeofday(struct timespec *tv)
1260 -{
1261 -       time_t sec;
1262 -       s64 nsec;
1263 -       unsigned int cpu;
1264 -       struct shadow_time_info *shadow;
1265 -       struct xen_platform_op op;
1266 -
1267 -       if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
1268 -               return -EINVAL;
1269 -
1270 -       if (!is_initial_xendomain() && !independent_wallclock)
1271 -               return -EPERM;
1272 -
1273 -       cpu = get_cpu();
1274 -       shadow = &per_cpu(shadow_time, cpu);
1275 -
1276 -       write_seqlock_irq(&xtime_lock);
1277 -
1278 -       /*
1279 -        * Ensure we don't get blocked for a long time so that our time delta
1280 -        * overflows. If that were to happen then our shadow time values would
1281 -        * be stale, so we can retry with fresh ones.
1282 -        */
1283 -       for (;;) {
1284 -               nsec = tv->tv_nsec - get_nsec_offset(shadow);
1285 -               if (time_values_up_to_date(cpu))
1286 -                       break;
1287 -               get_time_values_from_xen(cpu);
1288 -       }
1289 -       sec = tv->tv_sec;
1290 -       __normalize_time(&sec, &nsec);
1291 -
1292 -       if (is_initial_xendomain() && !independent_wallclock) {
1293 -               op.cmd = XENPF_settime;
1294 -               op.u.settime.secs        = sec;
1295 -               op.u.settime.nsecs       = nsec;
1296 -               op.u.settime.system_time = shadow->system_timestamp;
1297 -               WARN_ON(HYPERVISOR_platform_op(&op));
1298 -               update_wallclock();
1299 -       } else if (independent_wallclock) {
1300 -               nsec -= shadow->system_timestamp;
1301 -               __normalize_time(&sec, &nsec);
1302 -               __update_wallclock(sec, nsec);
1303 -       }
1304 -       ntp_clear();
1305 -
1306 -       /* Reset monotonic gettimeofday() timeval. */
1307 -       spin_lock(&monotonic_lock);
1308 -       monotonic_tv.tv_sec = 0;
1309 -       monotonic_tv.tv_usec = 0;
1310 -       spin_unlock(&monotonic_lock);
1311 -
1312 -       write_sequnlock_irq(&xtime_lock);
1313 -
1314 -       put_cpu();
1315 -
1316 -       clock_was_set();
1317 -       return 0;
1318 -}
1319 -
1320 -EXPORT_SYMBOL(do_settimeofday);
1321 -
1322 -#endif
1323 -
1324  static void sync_xen_wallclock(unsigned long dummy);
1325  static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
1326  static void sync_xen_wallclock(unsigned long dummy)
1327 @@ -534,15 +378,7 @@ static int set_rtc_mmss(unsigned long no
1328         return retval;
1329  }
1330  
1331 -#ifdef CONFIG_X86_64
1332 -/* monotonic_clock(): returns # of nanoseconds passed since time_init()
1333 - *             Note: This function is required to return accurate
1334 - *             time even in the absence of multiple timer ticks.
1335 - */
1336 -unsigned long long monotonic_clock(void)
1337 -#else
1338  unsigned long long sched_clock(void)
1339 -#endif
1340  {
1341         unsigned int cpu = get_cpu();
1342         struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
1343 @@ -562,21 +398,18 @@ unsigned long long sched_clock(void)
1344  
1345         return time;
1346  }
1347 -#ifdef CONFIG_X86_64
1348 -EXPORT_SYMBOL(monotonic_clock);
1349 -
1350 -unsigned long long sched_clock(void)
1351 -{
1352 -       return monotonic_clock();
1353 -}
1354 -#endif
1355  
1356  unsigned long profile_pc(struct pt_regs *regs)
1357  {
1358         unsigned long pc = instruction_pointer(regs);
1359  
1360  #if defined(CONFIG_SMP) || defined(__x86_64__)
1361 -       if (!user_mode_vm(regs) && in_lock_functions(pc)) {
1362 +# ifdef __i386__
1363 +       if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs)
1364 +# else
1365 +       if (!user_mode(regs)
1366 +# endif
1367 +           && in_lock_functions(pc)) {
1368  # ifdef CONFIG_FRAME_POINTER
1369  #  ifdef __i386__
1370                 return ((unsigned long *)regs->ebp)[1];
1371 @@ -585,14 +418,11 @@ unsigned long profile_pc(struct pt_regs 
1372  #  endif
1373  # else
1374  #  ifdef __i386__
1375 -               unsigned long *sp;
1376 -               if ((regs->xcs & 2) == 0)
1377 -                       sp = (unsigned long *)&regs->esp;
1378 -               else
1379 -                       sp = (unsigned long *)regs->esp;
1380 +               unsigned long *sp = (unsigned long *)&regs->esp;
1381  #  else
1382                 unsigned long *sp = (unsigned long *)regs->rsp;
1383  #  endif
1384 +
1385                 /* Return address is either directly at stack pointer
1386                    or above a saved eflags. Eflags has bits 22-31 zero,
1387                    kernel addresses don't. */
1388 @@ -746,19 +576,6 @@ irqreturn_t timer_interrupt(int irq, voi
1389         return IRQ_HANDLED;
1390  }
1391  
1392 -#ifndef CONFIG_X86_64
1393 -
1394 -void tsc_init(void)
1395 -{
1396 -       init_cpu_khz();
1397 -       printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
1398 -              cpu_khz / 1000, cpu_khz % 1000);
1399 -
1400 -       use_tsc_delay();
1401 -}
1402 -
1403 -#include <linux/clocksource.h>
1404 -
1405  void mark_tsc_unstable(void)
1406  {
1407  #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
1408 @@ -814,21 +631,9 @@ static struct clocksource clocksource_xe
1409         .mask                   = CLOCKSOURCE_MASK(64),
1410         .mult                   = 1 << XEN_SHIFT,               /* time directly in nanoseconds */
1411         .shift                  = XEN_SHIFT,
1412 -       .is_continuous          = 1,
1413 +       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
1414  };
1415  
1416 -static int __init init_xen_clocksource(void)
1417 -{
1418 -       clocksource_xen.mult = clocksource_khz2mult(cpu_khz,
1419 -                                               clocksource_xen.shift);
1420 -
1421 -       return clocksource_register(&clocksource_xen);
1422 -}
1423 -
1424 -module_init(init_xen_clocksource);
1425 -
1426 -#endif
1427 -
1428  static void init_missing_ticks_accounting(unsigned int cpu)
1429  {
1430         struct vcpu_register_runstate_memory_area area;
1431 @@ -849,7 +654,7 @@ static void init_missing_ticks_accountin
1432  }
1433  
1434  /* not static: needed by APM */
1435 -unsigned long get_cmos_time(void)
1436 +unsigned long read_persistent_clock(void)
1437  {
1438         unsigned long retval;
1439         unsigned long flags;
1440 @@ -862,11 +667,11 @@ unsigned long get_cmos_time(void)
1441  
1442         return retval;
1443  }
1444 -EXPORT_SYMBOL(get_cmos_time);
1445  
1446  static void sync_cmos_clock(unsigned long dummy);
1447  
1448  static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
1449 +int no_sync_cmos_clock;
1450  
1451  static void sync_cmos_clock(unsigned long dummy)
1452  {
1453 @@ -910,7 +715,8 @@ static void sync_cmos_clock(unsigned lon
1454  
1455  void notify_arch_cmos_timer(void)
1456  {
1457 -       mod_timer(&sync_cmos_timer, jiffies + 1);
1458 +       if (!no_sync_cmos_clock)
1459 +               mod_timer(&sync_cmos_timer, jiffies + 1);
1460         mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
1461  }
1462  
1463 @@ -943,29 +749,11 @@ static int time_init_device(void)
1464  
1465  device_initcall(time_init_device);
1466  
1467 -#ifdef CONFIG_HPET_TIMER
1468  extern void (*late_time_init)(void);
1469 -/* Duplicate of time_init() below, with hpet_enable part added */
1470 -static void __init hpet_time_init(void)
1471 -{
1472 -       struct timespec ts;
1473 -       ts.tv_sec = get_cmos_time();
1474 -       ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
1475 -
1476 -       do_settimeofday(&ts);
1477 -
1478 -       if ((hpet_enable() >= 0) && hpet_use_timer) {
1479 -               printk("Using HPET for base-timer\n");
1480 -       }
1481 -
1482 -       do_time_init();
1483 -}
1484 -#endif
1485  
1486  /* Dynamically-mapped IRQ. */
1487  DEFINE_PER_CPU(int, timer_irq);
1488  
1489 -extern void (*late_time_init)(void);
1490  static void setup_cpu0_timer_irq(void)
1491  {
1492         per_cpu(timer_irq, 0) =
1493 @@ -973,7 +761,7 @@ static void setup_cpu0_timer_irq(void)
1494                         VIRQ_TIMER,
1495                         0,
1496                         timer_interrupt,
1497 -                       IRQF_DISABLED|IRQF_TIMER,
1498 +                       IRQF_DISABLED|IRQF_TIMER|IRQF_NOBALANCING,
1499                         "timer0",
1500                         NULL);
1501         BUG_ON(per_cpu(timer_irq, 0) < 0);
1502 @@ -985,16 +773,9 @@ static struct vcpu_set_periodic_timer xe
1503  
1504  void __init time_init(void)
1505  {
1506 -#ifdef CONFIG_HPET_TIMER
1507 -       if (is_hpet_capable()) {
1508 -               /*
1509 -                * HPET initialization needs to do memory-mapped io. So, let
1510 -                * us do a late initialization after mem_init().
1511 -                */
1512 -               late_time_init = hpet_time_init;
1513 -               return;
1514 -       }
1515 -#endif
1516 +       init_cpu_khz();
1517 +       printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
1518 +              cpu_khz / 1000, cpu_khz % 1000);
1519  
1520         switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
1521                                    &xen_set_periodic_tick)) {
1522 @@ -1013,18 +794,12 @@ void __init time_init(void)
1523         per_cpu(processed_system_time, 0) = processed_system_time;
1524         init_missing_ticks_accounting(0);
1525  
1526 -       update_wallclock();
1527 +       clocksource_register(&clocksource_xen);
1528  
1529 -#ifdef CONFIG_X86_64
1530 -       init_cpu_khz();
1531 -       printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
1532 -              cpu_khz / 1000, cpu_khz % 1000);
1533 +       update_wallclock();
1534  
1535 -       vxtime.mode = VXTIME_TSC;
1536 -       vxtime.quot = (1000000L << 32) / vxtime_hz;
1537 -       vxtime.tsc_quot = (1000L << 32) / cpu_khz;
1538 -       sync_core();
1539 -       rdtscll(vxtime.last_tsc);
1540 +#ifndef CONFIG_X86_64
1541 +       use_tsc_delay();
1542  #endif
1543  
1544         /* Cannot request_irq() until kmem is initialised. */
1545 @@ -1194,7 +969,7 @@ int __cpuinit local_setup_timer(unsigned
1546         irq = bind_virq_to_irqhandler(VIRQ_TIMER,
1547                                       cpu,
1548                                       timer_interrupt,
1549 -                                     IRQF_DISABLED|IRQF_TIMER,
1550 +                                     IRQF_DISABLED|IRQF_TIMER|IRQF_NOBALANCING,
1551                                       timer_name[cpu],
1552                                       NULL);
1553         if (irq < 0)
1554 @@ -1283,7 +1058,7 @@ static ctl_table xen_table[] = {
1555  };
1556  static int __init xen_sysctl_init(void)
1557  {
1558 -       (void)register_sysctl_table(xen_table, 0);
1559 +       (void)register_sysctl_table(xen_table);
1560         return 0;
1561  }
1562  __initcall(xen_sysctl_init);
1563 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/traps_32-xen.c     2009-11-06 10:46:27.000000000 +0100
1564 +++ sle11sp1-2010-03-29/arch/x86/kernel/traps_32-xen.c  2009-11-06 10:46:41.000000000 +0100
1565 @@ -100,6 +100,7 @@ asmlinkage void fixup_4gb_segment(void);
1566  asmlinkage void machine_check(void);
1567  
1568  int kstack_depth_to_print = 24;
1569 +static unsigned int code_bytes = 64;
1570  ATOMIC_NOTIFIER_HEAD(i386die_chain);
1571  
1572  int register_die_notifier(struct notifier_block *nb)
1573 @@ -297,10 +298,11 @@ void show_registers(struct pt_regs *regs
1574         int i;
1575         int in_kernel = 1;
1576         unsigned long esp;
1577 -       unsigned short ss;
1578 +       unsigned short ss, gs;
1579  
1580         esp = (unsigned long) (&regs->esp);
1581         savesegment(ss, ss);
1582 +       savesegment(gs, gs);
1583         if (user_mode_vm(regs)) {
1584                 in_kernel = 0;
1585                 esp = regs->esp;
1586 @@ -319,8 +321,8 @@ void show_registers(struct pt_regs *regs
1587                 regs->eax, regs->ebx, regs->ecx, regs->edx);
1588         printk(KERN_EMERG "esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
1589                 regs->esi, regs->edi, regs->ebp, esp);
1590 -       printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
1591 -               regs->xds & 0xffff, regs->xes & 0xffff, ss);
1592 +       printk(KERN_EMERG "ds: %04x   es: %04x   fs: %04x  gs: %04x  ss: %04x\n",
1593 +              regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
1594         printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
1595                 TASK_COMM_LEN, current->comm, current->pid,
1596                 current_thread_info(), current, current->thread_info);
1597 @@ -330,7 +332,8 @@ void show_registers(struct pt_regs *regs
1598          */
1599         if (in_kernel) {
1600                 u8 *eip;
1601 -               int code_bytes = 64;
1602 +               unsigned int code_prologue = code_bytes * 43 / 64;
1603 +               unsigned int code_len = code_bytes;
1604                 unsigned char c;
1605  
1606                 printk("\n" KERN_EMERG "Stack: ");
1607 @@ -338,14 +341,14 @@ void show_registers(struct pt_regs *regs
1608  
1609                 printk(KERN_EMERG "Code: ");
1610  
1611 -               eip = (u8 *)regs->eip - 43;
1612 +               eip = (u8 *)regs->eip - code_prologue;
1613                 if (eip < (u8 *)PAGE_OFFSET ||
1614                         probe_kernel_address(eip, c)) {
1615                         /* try starting at EIP */
1616                         eip = (u8 *)regs->eip;
1617 -                       code_bytes = 32;
1618 +                       code_len = code_len - code_prologue + 1;
1619                 }
1620 -               for (i = 0; i < code_bytes; i++, eip++) {
1621 +               for (i = 0; i < code_len; i++, eip++) {
1622                         if (eip < (u8 *)PAGE_OFFSET ||
1623                                 probe_kernel_address(eip, c)) {
1624                                 printk(" Bad EIP value.");
1625 @@ -1134,3 +1137,13 @@ static int __init kstack_setup(char *s)
1626         return 1;
1627  }
1628  __setup("kstack=", kstack_setup);
1629 +
1630 +static int __init code_bytes_setup(char *s)
1631 +{
1632 +       code_bytes = simple_strtoul(s, NULL, 0);
1633 +       if (code_bytes > 8192)
1634 +               code_bytes = 8192;
1635 +
1636 +       return 1;
1637 +}
1638 +__setup("code_bytes=", code_bytes_setup);
1639 --- sle11sp1-2010-03-29.orig/arch/x86/mm/fault_32-xen.c 2009-11-06 10:46:27.000000000 +0100
1640 +++ sle11sp1-2010-03-29/arch/x86/mm/fault_32-xen.c      2009-11-06 10:46:41.000000000 +0100
1641 @@ -46,43 +46,17 @@ int unregister_page_fault_notifier(struc
1642  }
1643  EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
1644  
1645 -static inline int notify_page_fault(enum die_val val, const char *str,
1646 -                       struct pt_regs *regs, long err, int trap, int sig)
1647 +static inline int notify_page_fault(struct pt_regs *regs, long err)
1648  {
1649         struct die_args args = {
1650                 .regs = regs,
1651 -               .str = str,
1652 +               .str = "page fault",
1653                 .err = err,
1654 -               .trapnr = trap,
1655 -               .signr = sig
1656 +               .trapnr = 14,
1657 +               .signr = SIGSEGV
1658         };
1659 -       return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
1660 -}
1661 -
1662 -/*
1663 - * Unlock any spinlocks which will prevent us from getting the
1664 - * message out 
1665 - */
1666 -void bust_spinlocks(int yes)
1667 -{
1668 -       int loglevel_save = console_loglevel;
1669 -
1670 -       if (yes) {
1671 -               oops_in_progress = 1;
1672 -               return;
1673 -       }
1674 -#ifdef CONFIG_VT
1675 -       unblank_screen();
1676 -#endif
1677 -       oops_in_progress = 0;
1678 -       /*
1679 -        * OK, the message is on the console.  Now we call printk()
1680 -        * without oops_in_progress set so that printk will give klogd
1681 -        * a poke.  Hold onto your hats...
1682 -        */
1683 -       console_loglevel = 15;          /* NMI oopser may have shut the console up */
1684 -       printk(" ");
1685 -       console_loglevel = loglevel_save;
1686 +       return atomic_notifier_call_chain(&notify_page_fault_chain,
1687 +                                         DIE_PAGE_FAULT, &args);
1688  }
1689  
1690  /*
1691 @@ -476,8 +450,7 @@ fastcall void __kprobes do_page_fault(st
1692                 /* Can take a spurious fault if mapping changes R/O -> R/W. */
1693                 if (spurious_fault(regs, address, error_code))
1694                         return;
1695 -               if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
1696 -                                               SIGSEGV) == NOTIFY_STOP)
1697 +               if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
1698                         return;
1699                 /* 
1700                  * Don't take the mm semaphore here. If we fixup a prefetch
1701 @@ -486,8 +459,7 @@ fastcall void __kprobes do_page_fault(st
1702                 goto bad_area_nosemaphore;
1703         }
1704  
1705 -       if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
1706 -                                       SIGSEGV) == NOTIFY_STOP)
1707 +       if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
1708                 return;
1709  
1710         /* It's safe to allow irq's after cr2 has been saved and the vmalloc
1711 --- sle11sp1-2010-03-29.orig/arch/x86/mm/highmem_32-xen.c       2009-11-06 10:46:27.000000000 +0100
1712 +++ sle11sp1-2010-03-29/arch/x86/mm/highmem_32-xen.c    2009-11-06 10:46:41.000000000 +0100
1713 @@ -33,14 +33,16 @@ static void *__kmap_atomic(struct page *
1714  
1715         /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
1716         pagefault_disable();
1717 +
1718 +       idx = type + KM_TYPE_NR*smp_processor_id();
1719 +       BUG_ON(!pte_none(*(kmap_pte-idx)));
1720 +
1721         if (!PageHighMem(page))
1722                 return page_address(page);
1723  
1724 -       idx = type + KM_TYPE_NR*smp_processor_id();
1725         vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
1726 -       if (!pte_none(*(kmap_pte-idx)))
1727 -               BUG();
1728         set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
1729 +       /*arch_flush_lazy_mmu_mode();*/
1730  
1731         return (void*) vaddr;
1732  }
1733 @@ -94,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
1734         idx = type + KM_TYPE_NR*smp_processor_id();
1735         vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
1736         set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
1737 +       /*arch_flush_lazy_mmu_mode();*/
1738  
1739         return (void*) vaddr;
1740  }
1741 --- sle11sp1-2010-03-29.orig/arch/x86/mm/init_32-xen.c  2009-11-06 10:46:27.000000000 +0100
1742 +++ sle11sp1-2010-03-29/arch/x86/mm/init_32-xen.c       2009-11-06 10:46:41.000000000 +0100
1743 @@ -66,6 +66,7 @@ static pmd_t * __init one_md_table_init(
1744                 
1745  #ifdef CONFIG_X86_PAE
1746         pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
1747 +       paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
1748         make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
1749         set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
1750         pud = pud_offset(pgd, 0);
1751 @@ -87,6 +88,7 @@ static pte_t * __init one_page_table_ini
1752  {
1753         if (pmd_none(*pmd)) {
1754                 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
1755 +               paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
1756                 make_lowmem_page_readonly(page_table,
1757                                           XENFEAT_writable_page_tables);
1758                 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
1759 --- sle11sp1-2010-03-29.orig/arch/x86/mm/pgtable_32-xen.c       2009-11-06 10:46:27.000000000 +0100
1760 +++ sle11sp1-2010-03-29/arch/x86/mm/pgtable_32-xen.c    2009-11-06 10:46:41.000000000 +0100
1761 @@ -149,6 +149,8 @@ void __set_fixmap (enum fixed_addresses 
1762  void __init reserve_top_address(unsigned long reserve)
1763  {
1764         BUG_ON(fixmaps > 0);
1765 +       printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
1766 +              (int)-reserve);
1767         __FIXADDR_TOP = -reserve - PAGE_SIZE;
1768         __VMALLOC_RESERVE += reserve;
1769  }
1770 @@ -258,6 +260,12 @@ void pgd_ctor(void *pgd, struct kmem_cac
1771                                 swapper_pg_dir + USER_PTRS_PER_PGD,
1772                                 KERNEL_PGD_PTRS);
1773                 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
1774 +
1775 +               /* must happen under lock */
1776 +               paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
1777 +                       __pa(swapper_pg_dir) >> PAGE_SHIFT,
1778 +                       USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
1779 +
1780                 pgd_list_add(pgd);
1781                 spin_unlock_irqrestore(&pgd_lock, flags);
1782         }
1783 @@ -268,6 +276,7 @@ void pgd_dtor(void *pgd, struct kmem_cac
1784  {
1785         unsigned long flags; /* can be called from interrupt context */
1786  
1787 +       paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
1788         spin_lock_irqsave(&pgd_lock, flags);
1789         pgd_list_del(pgd);
1790         spin_unlock_irqrestore(&pgd_lock, flags);
1791 @@ -292,6 +301,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
1792                         pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
1793                         if (!pmd)
1794                                 goto out_oom;
1795 +                       paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
1796                         set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
1797                 }
1798                 return pgd;
1799 @@ -314,6 +324,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
1800                 pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
1801                 if (!pmd[i])
1802                         goto out_oom;
1803 +               paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
1804         }
1805  
1806         spin_lock_irqsave(&pgd_lock, flags);
1807 @@ -354,12 +365,17 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
1808  
1809  out_oom:
1810         if (HAVE_SHARED_KERNEL_PMD) {
1811 -               for (i--; i >= 0; i--)
1812 -                       kmem_cache_free(pmd_cache,
1813 -                                       (void *)__va(pgd_val(pgd[i])-1));
1814 +               for (i--; i >= 0; i--) {
1815 +                       pgd_t pgdent = pgd[i];
1816 +                       void* pmd = (void *)__va(pgd_val(pgdent)-1);
1817 +                       paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
1818 +                       kmem_cache_free(pmd_cache, pmd);
1819 +               }
1820         } else {
1821 -               for (i--; i >= 0; i--)
1822 +               for (i--; i >= 0; i--) {
1823 +                       paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
1824                         kmem_cache_free(pmd_cache, pmd[i]);
1825 +               }
1826                 kfree(pmd);
1827         }
1828         kmem_cache_free(pgd_cache, pgd);
1829 @@ -383,7 +399,9 @@ void pgd_free(pgd_t *pgd)
1830         /* in the PAE case user pgd entries are overwritten before usage */
1831         if (PTRS_PER_PMD > 1) {
1832                 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
1833 -                       pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
1834 +                       pgd_t pgdent = pgd[i];
1835 +                       void* pmd = (void *)__va(pgd_val(pgdent)-1);
1836 +                       paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
1837                         kmem_cache_free(pmd_cache, pmd);
1838                 }
1839  
1840 --- sle11sp1-2010-03-29.orig/arch/x86/ia32/ia32entry-xen.S      2009-11-06 10:45:48.000000000 +0100
1841 +++ sle11sp1-2010-03-29/arch/x86/ia32/ia32entry-xen.S   2009-11-06 10:46:41.000000000 +0100
1842 @@ -465,7 +465,7 @@ ia32_sys_call_table:
1843         .quad sys32_vm86_warning        /* vm86old */ 
1844         .quad compat_sys_wait4
1845         .quad sys_swapoff               /* 115 */
1846 -       .quad sys32_sysinfo
1847 +       .quad compat_sys_sysinfo
1848         .quad sys32_ipc
1849         .quad sys_fsync
1850         .quad stub32_sigreturn
1851 @@ -510,7 +510,7 @@ ia32_sys_call_table:
1852         .quad sys_sched_yield
1853         .quad sys_sched_get_priority_max
1854         .quad sys_sched_get_priority_min  /* 160 */
1855 -       .quad sys_sched_rr_get_interval
1856 +       .quad sys32_sched_rr_get_interval
1857         .quad compat_sys_nanosleep
1858         .quad sys_mremap
1859         .quad sys_setresuid16
1860 @@ -668,4 +668,5 @@ ia32_sys_call_table:
1861         .quad compat_sys_vmsplice
1862         .quad compat_sys_move_pages
1863         .quad sys_getcpu
1864 +       .quad sys_epoll_pwait
1865  ia32_syscall_end:              
1866 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/acpi/sleep_64-xen.c        2008-04-15 09:29:41.000000000 +0200
1867 +++ sle11sp1-2010-03-29/arch/x86/kernel/acpi/sleep_64-xen.c     2009-11-06 10:46:41.000000000 +0100
1868 @@ -59,7 +59,7 @@ unsigned long acpi_wakeup_address = 0;
1869  unsigned long acpi_video_flags;
1870  extern char wakeup_start, wakeup_end;
1871  
1872 -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
1873 +extern unsigned long acpi_copy_wakeup_routine(unsigned long);
1874  
1875  static pgd_t low_ptr;
1876  
1877 @@ -67,8 +67,10 @@ static void init_low_mapping(void)
1878  {
1879         pgd_t *slot0 = pgd_offset(current->mm, 0UL);
1880         low_ptr = *slot0;
1881 +       /* FIXME: We're playing with the current task's page tables here, which
1882 +        * is potentially dangerous on SMP systems.
1883 +        */
1884         set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
1885 -       WARN_ON(num_online_cpus() != 1);
1886         local_flush_tlb();
1887  }
1888  #endif
1889 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/e820_64-xen.c      2009-12-04 10:49:03.000000000 +0100
1890 +++ sle11sp1-2010-03-29/arch/x86/kernel/e820_64-xen.c   2009-12-04 10:52:06.000000000 +0100
1891 @@ -90,6 +90,13 @@ static inline int bad_addr(unsigned long
1892                 return 1;
1893         }
1894  
1895 +#ifdef CONFIG_NUMA
1896 +       /* NUMA memory to node map */
1897 +       if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
1898 +               *addrp = nodemap_addr + nodemap_size;
1899 +               return 1;
1900 +       }
1901 +#endif
1902         /* XXX ramdisk image here? */ 
1903  #else
1904         if (last < (table_end<<PAGE_SHIFT)) {
1905 @@ -215,6 +222,37 @@ unsigned long __init e820_end_of_ram(voi
1906  }
1907  
1908  /*
1909 + * Find the hole size in the range.
1910 + */
1911 +unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
1912 +{
1913 +       unsigned long ram = 0;
1914 +       int i;
1915 +
1916 +       for (i = 0; i < e820.nr_map; i++) {
1917 +               struct e820entry *ei = &e820.map[i];
1918 +               unsigned long last, addr;
1919 +
1920 +               if (ei->type != E820_RAM ||
1921 +                   ei->addr+ei->size <= start ||
1922 +                   ei->addr >= end)
1923 +                       continue;
1924 +
1925 +               addr = round_up(ei->addr, PAGE_SIZE);
1926 +               if (addr < start)
1927 +                       addr = start;
1928 +
1929 +               last = round_down(ei->addr + ei->size, PAGE_SIZE);
1930 +               if (last >= end)
1931 +                       last = end;
1932 +
1933 +               if (last > addr)
1934 +                       ram += last - addr;
1935 +       }
1936 +       return ((end - start) - ram);
1937 +}
1938 +
1939 +/*
1940   * Mark e820 reserved areas as busy for the resource manager.
1941   */
1942  void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
1943 @@ -751,7 +789,7 @@ static int __init parse_memmap_opt(char 
1944  }
1945  early_param("memmap", parse_memmap_opt);
1946  
1947 -void finish_e820_parsing(void)
1948 +void __init finish_e820_parsing(void)
1949  {
1950         if (userdef) {
1951                 printk(KERN_INFO "user-defined physical RAM map:\n");
1952 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/entry_64-xen.S     2009-11-06 10:46:27.000000000 +0100
1953 +++ sle11sp1-2010-03-29/arch/x86/kernel/entry_64-xen.S  2009-11-06 10:46:41.000000000 +0100
1954 @@ -629,6 +629,9 @@ END(invalidate_interrupt\num)
1955  ENTRY(call_function_interrupt)
1956         apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
1957  END(call_function_interrupt)
1958 +ENTRY(irq_move_cleanup_interrupt)
1959 +       apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
1960 +END(irq_move_cleanup_interrupt)
1961  #endif
1962  
1963  ENTRY(apic_timer_interrupt)
1964 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/head64-xen.c       2009-11-06 10:46:27.000000000 +0100
1965 +++ sle11sp1-2010-03-29/arch/x86/kernel/head64-xen.c    2009-11-06 10:46:41.000000000 +0100
1966 @@ -45,8 +45,6 @@ static void __init clear_bss(void)
1967  #define OLD_CL_BASE_ADDR        0x90000
1968  #define OLD_CL_OFFSET           0x90022
1969  
1970 -extern char saved_command_line[];
1971 -
1972  static void __init copy_bootdata(char *real_mode_data)
1973  {
1974  #ifndef CONFIG_XEN
1975 @@ -62,14 +60,14 @@ static void __init copy_bootdata(char *r
1976                 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
1977         }
1978         command_line = (char *) ((u64)(new_data));
1979 -       memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
1980 +       memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
1981  #else
1982         int max_cmdline;
1983         
1984         if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
1985                 max_cmdline = COMMAND_LINE_SIZE;
1986 -       memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
1987 -       saved_command_line[max_cmdline-1] = '\0';
1988 +       memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline);
1989 +       boot_command_line[max_cmdline-1] = '\0';
1990  #endif
1991  }
1992  
1993 --- sle11sp1-2010-03-29.orig/arch/x86/kernel/io_apic_64-xen.c   2009-11-06 10:46:27.000000000 +0100
1994 +++ sle11sp1-2010-03-29/arch/x86/kernel/io_apic_64-xen.c        2009-11-06 10:46:41.000000000 +0100
1995 @@ -36,6 +36,7 @@
1996  #include <acpi/acpi_bus.h>
1997  #endif
1998  
1999 +#include <asm/idle.h>
2000  #include <asm/io.h>
2001  #include <asm/smp.h>
2002  #include <asm/desc.h>
2003 @@ -47,7 +48,20 @@
2004  #include <asm/msidef.h>
2005  #include <asm/hypertransport.h>
2006  
2007 -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
2008 +struct irq_cfg {
2009 +#ifndef CONFIG_XEN
2010 +       cpumask_t domain;
2011 +       cpumask_t old_domain;
2012 +#endif
2013 +       unsigned move_cleanup_count;
2014 +       u8 vector;
2015 +       u8 move_in_progress : 1;
2016 +};
2017 +
2018 +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
2019 +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
2020 +
2021 +static int assign_irq_vector(int irq, cpumask_t mask);
2022  
2023  #define __apicdebuginit  __init
2024  
2025 @@ -89,7 +103,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
2026   * Rough estimation of how many shared IRQs there are, can
2027   * be changed anytime.
2028   */
2029 -#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
2030 +#define MAX_PLUS_SHARED_IRQS NR_IRQS
2031  #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
2032  
2033  /*
2034 @@ -262,21 +276,19 @@ static void __target_IO_APIC_irq(unsigne
2035  
2036  static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2037  {
2038 +       struct irq_cfg *cfg = irq_cfg + irq;
2039         unsigned long flags;
2040         unsigned int dest;
2041         cpumask_t tmp;
2042 -       int vector;
2043  
2044         cpus_and(tmp, mask, cpu_online_map);
2045         if (cpus_empty(tmp))
2046 -               tmp = TARGET_CPUS;
2047 -
2048 -       cpus_and(mask, tmp, CPU_MASK_ALL);
2049 +               return;
2050  
2051 -       vector = assign_irq_vector(irq, mask, &tmp);
2052 -       if (vector < 0)
2053 +       if (assign_irq_vector(irq, mask))
2054                 return;
2055  
2056 +       cpus_and(tmp, cfg->domain, mask);
2057         dest = cpu_mask_to_apicid(tmp);
2058  
2059         /*
2060 @@ -285,8 +297,8 @@ static void set_ioapic_affinity_irq(unsi
2061         dest = SET_APIC_LOGICAL_ID(dest);
2062  
2063         spin_lock_irqsave(&ioapic_lock, flags);
2064 -       __target_IO_APIC_irq(irq, dest, vector);
2065 -       set_native_irq_info(irq, mask);
2066 +       __target_IO_APIC_irq(irq, dest, cfg->vector);
2067 +       irq_desc[irq].affinity = mask;
2068         spin_unlock_irqrestore(&ioapic_lock, flags);
2069  }
2070  #endif
2071 @@ -332,11 +344,11 @@ static void add_pin_to_irq(unsigned int 
2072                 reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
2073                 reg ACTION;                                             \
2074                 io_apic_modify(entry->apic, reg);                       \
2075 +               FINAL;                                                  \
2076                 if (!entry->next)                                       \
2077                         break;                                          \
2078                 entry = irq_2_pin + entry->next;                        \
2079         }                                                               \
2080 -       FINAL;                                                          \
2081  }
2082  
2083  #define DO_ACTION(name,R,ACTION, FINAL)                                        \
2084 @@ -669,77 +681,62 @@ static int pin_2_irq(int idx, int apic, 
2085         return irq;
2086  }
2087  
2088 -static inline int IO_APIC_irq_trigger(int irq)
2089 -{
2090 -       int apic, idx, pin;
2091 -
2092 -       for (apic = 0; apic < nr_ioapics; apic++) {
2093 -               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
2094 -                       idx = find_irq_entry(apic,pin,mp_INT);
2095 -                       if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
2096 -                               return irq_trigger(idx);
2097 -               }
2098 -       }
2099 -       /*
2100 -        * nonexistent IRQs are edge default
2101 -        */
2102 -       return 0;
2103 -}
2104 -
2105 -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
2106 -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
2107 -
2108 -static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
2109 +static int __assign_irq_vector(int irq, cpumask_t mask)
2110  {
2111 -       int vector;
2112         struct physdev_irq irq_op;
2113 +       struct irq_cfg *cfg;
2114    
2115 -       BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
2116 +       BUG_ON((unsigned)irq >= NR_IRQS);
2117  
2118         if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
2119                 return -EINVAL;
2120  
2121 -       cpus_and(*result, mask, cpu_online_map);
2122 +       cfg = &irq_cfg[irq];
2123 +
2124 +       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
2125 +               return -EBUSY;
2126  
2127 -       if (irq_vector[irq] > 0)
2128 -               return irq_vector[irq];
2129 +       if (cfg->vector)
2130 +               return 0;
2131  
2132         irq_op.irq = irq;
2133         if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
2134                 return -ENOSPC;
2135  
2136 -       vector = irq_op.vector;
2137 -       irq_vector[irq] = vector;
2138 +       cfg->vector = irq_op.vector;
2139  
2140 -       return vector;
2141 +       return 0;
2142  }
2143  
2144 -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
2145 +static int assign_irq_vector(int irq, cpumask_t mask)
2146  {
2147 -       int vector;
2148 +       int err;
2149         unsigned long flags;
2150  
2151         spin_lock_irqsave(&vector_lock, flags);
2152 -       vector = __assign_irq_vector(irq, mask, result);
2153 +       err = __assign_irq_vector(irq, mask);
2154         spin_unlock_irqrestore(&vector_lock, flags);
2155 -       return vector;
2156 +       return err;
2157  }
2158  
2159  #ifndef CONFIG_XEN
2160  static void __clear_irq_vector(int irq)
2161  {
2162 +       struct irq_cfg *cfg;
2163         cpumask_t mask;
2164         int cpu, vector;
2165  
2166 -       BUG_ON(!irq_vector[irq]);
2167 +       BUG_ON((unsigned)irq >= NR_IRQS);
2168 +       cfg = &irq_cfg[irq];
2169 +       BUG_ON(!cfg->vector);
2170  
2171 -       vector = irq_vector[irq];
2172 -       cpus_and(mask, irq_domain[irq], cpu_online_map);
2173 +       vector = cfg->vector;
2174 +       cpus_and(mask, cfg->domain, cpu_online_map);
2175         for_each_cpu_mask(cpu, mask)
2176                 per_cpu(vector_irq, cpu)[vector] = -1;
2177  
2178 -       irq_vector[irq] = 0;
2179 -       irq_domain[irq] = CPU_MASK_NONE;
2180 +       cfg->vector = 0;
2181 +       cfg->domain = CPU_MASK_NONE;
2182  }
2183  
2184  void __setup_vector_irq(int cpu)
2185 @@ -749,10 +746,10 @@ void __setup_vector_irq(int cpu)
2186         int irq, vector;
2187  
2188         /* Mark the inuse vectors */
2189 -       for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
2190 -               if (!cpu_isset(cpu, irq_domain[irq]))
2191 +       for (irq = 0; irq < NR_IRQS; ++irq) {
2192 +               if (!cpu_isset(cpu, irq_cfg[irq].domain))
2193                         continue;
2194 -               vector = irq_vector[irq];
2195 +               vector = irq_cfg[irq].vector;
2196                 per_cpu(vector_irq, cpu)[vector] = irq;
2197         }
2198         /* Mark the free vectors */
2199 @@ -760,41 +757,49 @@ void __setup_vector_irq(int cpu)
2200                 irq = per_cpu(vector_irq, cpu)[vector];
2201                 if (irq < 0)
2202                         continue;
2203 -               if (!cpu_isset(cpu, irq_domain[irq]))
2204 +               if (!cpu_isset(cpu, irq_cfg[irq].domain))
2205                         per_cpu(vector_irq, cpu)[vector] = -1;
2206         }
2207  }
2208  
2209 -extern void (*interrupt[NR_IRQS])(void);
2210 -
2211  static struct irq_chip ioapic_chip;
2212  
2213 -#define IOAPIC_AUTO    -1
2214 -#define IOAPIC_EDGE    0
2215 -#define IOAPIC_LEVEL   1
2216 -
2217 -static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
2218 +static void ioapic_register_intr(int irq, unsigned long trigger)
2219  {
2220 -       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
2221 -                       trigger == IOAPIC_LEVEL)
2222 +       if (trigger)
2223                 set_irq_chip_and_handler_name(irq, &ioapic_chip,
2224                                               handle_fasteoi_irq, "fasteoi");
2225 -       else {
2226 -               irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
2227 +       else
2228                 set_irq_chip_and_handler_name(irq, &ioapic_chip,
2229                                               handle_edge_irq, "edge");
2230 -       }
2231  }
2232  #else
2233 -#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
2234 +#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
2235  #endif /* !CONFIG_XEN */
2236  
2237 -static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
2238 +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
2239 +                             int trigger, int polarity)
2240  {
2241 +       struct irq_cfg *cfg = irq_cfg + irq;
2242         struct IO_APIC_route_entry entry;
2243 -       int vector;
2244 -       unsigned long flags;
2245 +       cpumask_t mask;
2246 +
2247 +       if (!IO_APIC_IRQ(irq))
2248 +               return;
2249  
2250 +       mask = TARGET_CPUS;
2251 +       if (assign_irq_vector(irq, mask))
2252 +               return;
2253 +
2254 +#ifndef CONFIG_XEN
2255 +       cpus_and(mask, cfg->domain, mask);
2256 +#endif
2257 +
2258 +       apic_printk(APIC_VERBOSE,KERN_DEBUG
2259 +                   "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
2260 +                   "IRQ %d Mode:%i Active:%i)\n",
2261 +                   apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
2262 +                   irq, trigger, polarity);
2263  
2264         /*
2265          * add it to the IO-APIC irq-routing table:
2266 @@ -803,41 +808,23 @@ static void __init setup_IO_APIC_irq(int
2267  
2268         entry.delivery_mode = INT_DELIVERY_MODE;
2269         entry.dest_mode = INT_DEST_MODE;
2270 +       entry.dest = cpu_mask_to_apicid(mask);
2271         entry.mask = 0;                         /* enable IRQ */
2272 -       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
2273 -
2274 -       entry.trigger = irq_trigger(idx);
2275 -       entry.polarity = irq_polarity(idx);
2276 +       entry.trigger = trigger;
2277 +       entry.polarity = polarity;
2278 +       entry.vector = cfg->vector;
2279  
2280 -       if (irq_trigger(idx)) {
2281 -               entry.trigger = 1;
2282 +       /* Mask level triggered irqs.
2283 +        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
2284 +        */
2285 +       if (trigger)
2286                 entry.mask = 1;
2287 -               entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
2288 -       }
2289 -
2290 -       if (/* !apic && */ !IO_APIC_IRQ(irq))
2291 -               return;
2292  
2293 -       if (IO_APIC_IRQ(irq)) {
2294 -               cpumask_t mask;
2295 -               vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
2296 -               if (vector < 0)
2297 -                       return;
2298 -
2299 -               entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
2300 -               entry.vector = vector;
2301 -
2302 -               ioapic_register_intr(irq, vector, IOAPIC_AUTO);
2303 -               if (!apic && (irq < 16))
2304 -                       disable_8259A_irq(irq);
2305 -       }
2306 +       ioapic_register_intr(irq, trigger);
2307 +       if (irq < 16)
2308 +               disable_8259A_irq(irq);
2309  
2310         ioapic_write_entry(apic, pin, entry);
2311 -
2312 -       spin_lock_irqsave(&ioapic_lock, flags);
2313 -       set_native_irq_info(irq, TARGET_CPUS);
2314 -       spin_unlock_irqrestore(&ioapic_lock, flags);
2315 -
2316  }
2317  
2318  static void __init setup_IO_APIC_irqs(void)
2319 @@ -862,8 +849,8 @@ static void __init setup_IO_APIC_irqs(vo
2320                 irq = pin_2_irq(idx, apic, pin);
2321                 add_pin_to_irq(irq, apic, pin);
2322  
2323 -               setup_IO_APIC_irq(apic, pin, idx, irq);
2324 -
2325 +               setup_IO_APIC_irq(apic, pin, irq,
2326 +                                 irq_trigger(idx), irq_polarity(idx));
2327         }
2328         }
2329  
2330 @@ -894,7 +881,7 @@ static void __init setup_ExtINT_IRQ0_pin
2331          */
2332         entry.dest_mode = INT_DEST_MODE;
2333         entry.mask = 0;                                 /* unmask IRQ now */
2334 -       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
2335 +       entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
2336         entry.delivery_mode = INT_DELIVERY_MODE;
2337         entry.polarity = 0;
2338         entry.trigger = 0;
2339 @@ -994,18 +981,17 @@ void __apicdebuginit print_IO_APIC(void)
2340  
2341         printk(KERN_DEBUG ".... IRQ redirection table:\n");
2342  
2343 -       printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
2344 -                         " Stat Dest Deli Vect:   \n");
2345 +       printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
2346 +                         " Stat Dmod Deli Vect:   \n");
2347  
2348         for (i = 0; i <= reg_01.bits.entries; i++) {
2349                 struct IO_APIC_route_entry entry;
2350  
2351                 entry = ioapic_read_entry(apic, i);
2352  
2353 -               printk(KERN_DEBUG " %02x %03X %02X  ",
2354 +               printk(KERN_DEBUG " %02x %03X ",
2355                         i,
2356 -                       entry.dest.logical.logical_dest,
2357 -                       entry.dest.physical.physical_dest
2358 +                       entry.dest
2359                 );
2360  
2361                 printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
2362 @@ -1269,8 +1255,7 @@ void disable_IO_APIC(void)
2363                 entry.dest_mode       = 0; /* Physical */
2364                 entry.delivery_mode   = dest_ExtINT; /* ExtInt */
2365                 entry.vector          = 0;
2366 -               entry.dest.physical.physical_dest =
2367 -                                       GET_APIC_ID(apic_read(APIC_ID));
2368 +               entry.dest          = GET_APIC_ID(apic_read(APIC_ID));
2369  
2370                 /*
2371                  * Add it to the IO-APIC irq-routing table:
2372 @@ -1355,16 +1340,15 @@ static unsigned int startup_ioapic_irq(u
2373  
2374  static int ioapic_retrigger_irq(unsigned int irq)
2375  {
2376 +       struct irq_cfg *cfg = &irq_cfg[irq];
2377         cpumask_t mask;
2378 -       unsigned vector;
2379         unsigned long flags;
2380  
2381         spin_lock_irqsave(&vector_lock, flags);
2382 -       vector = irq_vector[irq];
2383         cpus_clear(mask);
2384 -       cpu_set(first_cpu(irq_domain[irq]), mask);
2385 +       cpu_set(first_cpu(cfg->domain), mask);
2386  
2387 -       send_IPI_mask(mask, vector);
2388 +       send_IPI_mask(mask, cfg->vector);
2389         spin_unlock_irqrestore(&vector_lock, flags);
2390  
2391         return 1;
2392 @@ -1379,8 +1363,68 @@ static int ioapic_retrigger_irq(unsigned
2393   * races.
2394   */
2395  
2396 +#ifdef CONFIG_SMP
2397 +asmlinkage void smp_irq_move_cleanup_interrupt(void)
2398 +{
2399 +       unsigned vector, me;
2400 +       ack_APIC_irq();
2401 +       exit_idle();
2402 +       irq_enter();
2403 +
2404 +       me = smp_processor_id();
2405 +       for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
2406 +               unsigned int irq;
2407 +               struct irq_desc *desc;
2408 +               struct irq_cfg *cfg;
2409 +               irq = __get_cpu_var(vector_irq)[vector];
2410 +               if (irq >= NR_IRQS)
2411 +                       continue;
2412 +
2413 +               desc = irq_desc + irq;
2414 +               cfg = irq_cfg + irq;
2415 +               spin_lock(&desc->lock);
2416 +               if (!cfg->move_cleanup_count)
2417 +                       goto unlock;
2418 +
2419 +               if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
2420 +                       goto unlock;
2421 +
2422 +               __get_cpu_var(vector_irq)[vector] = -1;
2423 +               cfg->move_cleanup_count--;
2424 +unlock:
2425 +               spin_unlock(&desc->lock);
2426 +       }
2427 +
2428 +       irq_exit();
2429 +}
2430 +
2431 +static void irq_complete_move(unsigned int irq)
2432 +{
2433 +       struct irq_cfg *cfg = irq_cfg + irq;
2434 +       unsigned vector, me;
2435 +
2436 +       if (likely(!cfg->move_in_progress))
2437 +               return;
2438 +
2439 +       vector = ~get_irq_regs()->orig_rax;
2440 +       me = smp_processor_id();
2441 +       if ((vector == cfg->vector) &&
2442 +           cpu_isset(smp_processor_id(), cfg->domain)) {
2443 +               cpumask_t cleanup_mask;
2444 +
2445 +               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2446 +               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2447 +               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2448 +               cfg->move_in_progress = 0;
2449 +       }
2450 +}
2451 +#else
2452 +static inline void irq_complete_move(unsigned int irq) {}
2453 +#endif
2454 +
2455  static void ack_apic_edge(unsigned int irq)
2456  {
2457 +       irq_complete_move(irq);
2458         move_native_irq(irq);
2459         ack_APIC_irq();
2460  }
2461 @@ -1389,6 +1433,7 @@ static void ack_apic_level(unsigned int 
2462  {
2463         int do_unmask_irq = 0;
2464  
2465 +       irq_complete_move(irq);
2466  #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
2467         /* If we are moving the irq we need to mask it */
2468         if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
2469 @@ -1440,7 +1485,7 @@ static inline void init_IO_APIC_traps(vo
2470          */
2471         for (irq = 0; irq < NR_IRQS ; irq++) {
2472                 int tmp = irq;
2473 -               if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
2474 +               if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
2475                         /*
2476                          * Hmm.. We don't have an entry for this,
2477                          * so default to an old-fashioned 8259
2478 @@ -1538,7 +1583,7 @@ static inline void unlock_ExtINT_logic(v
2479  
2480         entry1.dest_mode = 0;                   /* physical delivery */
2481         entry1.mask = 0;                        /* unmask IRQ now */
2482 -       entry1.dest.physical.physical_dest = hard_smp_processor_id();
2483 +       entry1.dest = hard_smp_processor_id();
2484         entry1.delivery_mode = dest_ExtINT;
2485         entry1.polarity = entry0.polarity;
2486         entry1.trigger = 0;
2487 @@ -1582,15 +1627,14 @@ static inline void unlock_ExtINT_logic(v
2488   */
2489  static inline void check_timer(void)
2490  {
2491 +       struct irq_cfg *cfg = irq_cfg + 0;
2492         int apic1, pin1, apic2, pin2;
2493 -       int vector;
2494 -       cpumask_t mask;
2495  
2496         /*
2497          * get/set the timer IRQ vector:
2498          */
2499         disable_8259A_irq(0);
2500 -       vector = assign_irq_vector(0, TARGET_CPUS, &mask);
2501 +       assign_irq_vector(0, TARGET_CPUS);
2502  
2503         /*
2504          * Subtle, code in do_timer_interrupt() expects an AEOI
2505 @@ -1610,7 +1654,7 @@ static inline void check_timer(void)
2506         apic2 = ioapic_i8259.apic;
2507  
2508         apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2509 -               vector, apic1, pin1, apic2, pin2);
2510 +               cfg->vector, apic1, pin1, apic2, pin2);
2511  
2512         if (pin1 != -1) {
2513                 /*
2514 @@ -1641,7 +1685,7 @@ static inline void check_timer(void)
2515                 /*
2516                  * legacy devices should be connected to IO APIC #0
2517                  */
2518 -               setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
2519 +               setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
2520                 if (timer_irq_works()) {
2521                         apic_printk(APIC_VERBOSE," works.\n");
2522                         nmi_watchdog_default();
2523 @@ -1666,14 +1710,14 @@ static inline void check_timer(void)
2524  
2525         disable_8259A_irq(0);
2526         irq_desc[0].chip = &lapic_irq_type;
2527 -       apic_write(APIC_LVT0, APIC_DM_FIXED | vector);  /* Fixed mode */
2528 +       apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
2529         enable_8259A_irq(0);
2530  
2531         if (timer_irq_works()) {
2532                 apic_printk(APIC_VERBOSE," works.\n");
2533                 return;
2534         }
2535 -       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
2536 +       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
2537         apic_printk(APIC_VERBOSE," failed.\n");
2538  
2539         apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
2540 @@ -1828,19 +1872,16 @@ int create_irq(void)
2541         /* Allocate an unused irq */
2542         int irq;
2543         int new;
2544 -       int vector = 0;
2545         unsigned long flags;
2546 -       cpumask_t mask;
2547  
2548         irq = -ENOSPC;
2549         spin_lock_irqsave(&vector_lock, flags);
2550         for (new = (NR_IRQS - 1); new >= 0; new--) {
2551                 if (platform_legacy_irq(new))
2552                         continue;
2553 -               if (irq_vector[new] != 0)
2554 +               if (irq_cfg[new].vector != 0)
2555                         continue;
2556 -               vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
2557 -               if (likely(vector > 0))
2558 +               if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2559                         irq = new;
2560                 break;
2561         }
2562 @@ -1871,12 +1912,15 @@ void destroy_irq(unsigned int irq)
2563  #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
2564  static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2565  {
2566 -       int vector;
2567 +       struct irq_cfg *cfg = irq_cfg + irq;
2568 +       int err;
2569         unsigned dest;
2570         cpumask_t tmp;
2571  
2572 -       vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
2573 -       if (vector >= 0) {
2574 +       tmp = TARGET_CPUS;
2575 +       err = assign_irq_vector(irq, tmp);
2576 +       if (!err) {
2577 +               cpus_and(tmp, cfg->domain, tmp);
2578                 dest = cpu_mask_to_apicid(tmp);
2579  
2580                 msg->address_hi = MSI_ADDR_BASE_HI;
2581 @@ -1896,40 +1940,38 @@ static int msi_compose_msg(struct pci_de
2582                         ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2583                                 MSI_DATA_DELIVERY_FIXED:
2584                                 MSI_DATA_DELIVERY_LOWPRI) |
2585 -                       MSI_DATA_VECTOR(vector);
2586 +                       MSI_DATA_VECTOR(cfg->vector);
2587         }
2588 -       return vector;
2589 +       return err;
2590  }
2591  
2592  #ifdef CONFIG_SMP
2593  static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2594  {
2595 +       struct irq_cfg *cfg = irq_cfg + irq;
2596         struct msi_msg msg;
2597         unsigned int dest;
2598         cpumask_t tmp;
2599 -       int vector;
2600  
2601         cpus_and(tmp, mask, cpu_online_map);
2602         if (cpus_empty(tmp))
2603 -               tmp = TARGET_CPUS;
2604 -
2605 -       cpus_and(mask, tmp, CPU_MASK_ALL);
2606 +               return;
2607  
2608 -       vector = assign_irq_vector(irq, mask, &tmp);
2609 -       if (vector < 0)
2610 +       if (assign_irq_vector(irq, mask))
2611                 return;
2612  
2613 +       cpus_and(tmp, cfg->domain, mask);
2614         dest = cpu_mask_to_apicid(tmp);
2615  
2616         read_msi_msg(irq, &msg);
2617  
2618         msg.data &= ~MSI_DATA_VECTOR_MASK;
2619 -       msg.data |= MSI_DATA_VECTOR(vector);
2620 +       msg.data |= MSI_DATA_VECTOR(cfg->vector);
2621         msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2622         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2623  
2624         write_msi_msg(irq, &msg);
2625 -       set_native_irq_info(irq, mask);
2626 +       irq_desc[irq].affinity = mask;
2627  }
2628  #endif /* CONFIG_SMP */
2629  
2630 @@ -1948,24 +1990,31 @@ static struct irq_chip msi_chip = {
2631         .retrigger      = ioapic_retrigger_irq,
2632  };
2633  
2634 -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
2635 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2636  {
2637         struct msi_msg msg;
2638 -       int ret;
2639 +       int irq, ret;
2640 +       irq = create_irq();
2641 +       if (irq < 0)
2642 +               return irq;
2643 +
2644 +       set_irq_msi(irq, desc);
2645         ret = msi_compose_msg(dev, irq, &msg);
2646 -       if (ret < 0)
2647 +       if (ret < 0) {
2648 +               destroy_irq(irq);
2649                 return ret;
2650 +       }
2651  
2652         write_msi_msg(irq, &msg);
2653  
2654         set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2655  
2656 -       return 0;
2657 +       return irq;
2658  }
2659  
2660  void arch_teardown_msi_irq(unsigned int irq)
2661  {
2662 -       return;
2663 +       destroy_irq(irq);
2664  }
2665  
2666  #endif /* CONFIG_PCI_MSI */
2667 @@ -1993,24 +2042,22 @@ static void target_ht_irq(unsigned int i
2668  
2669  static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2670  {
2671 +       struct irq_cfg *cfg = irq_cfg + irq;
2672         unsigned int dest;
2673         cpumask_t tmp;
2674 -       int vector;
2675  
2676         cpus_and(tmp, mask, cpu_online_map);
2677         if (cpus_empty(tmp))
2678 -               tmp = TARGET_CPUS;
2679 -
2680 -       cpus_and(mask, tmp, CPU_MASK_ALL);
2681 +               return;
2682  
2683 -       vector = assign_irq_vector(irq, mask, &tmp);
2684 -       if (vector < 0)
2685 +       if (assign_irq_vector(irq, mask))
2686                 return;
2687  
2688 +       cpus_and(tmp, cfg->domain, mask);
2689         dest = cpu_mask_to_apicid(tmp);
2690  
2691 -       target_ht_irq(irq, dest, vector);
2692 -       set_native_irq_info(irq, mask);
2693 +       target_ht_irq(irq, dest, cfg->vector);
2694 +       irq_desc[irq].affinity = mask;
2695  }
2696  #endif
2697  
2698 @@ -2027,14 +2074,17 @@ static struct irq_chip ht_irq_chip = {
2699  
2700  int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2701  {
2702 -       int vector;
2703 +       struct irq_cfg *cfg = irq_cfg + irq;
2704 +       int err;
2705         cpumask_t tmp;
2706  
2707 -       vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
2708 -       if (vector >= 0) {
2709 +       tmp = TARGET_CPUS;
2710 +       err = assign_irq_vector(irq, tmp);
2711 +       if (!err) {
2712                 struct ht_irq_msg msg;
2713                 unsigned dest;
2714  
2715 +               cpus_and(tmp, cfg->domain, tmp);
2716                 dest = cpu_mask_to_apicid(tmp);
2717  
2718                 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
2719 @@ -2042,7 +2092,7 @@ int arch_setup_ht_irq(unsigned int irq, 
2720                 msg.address_lo =
2721                         HT_IRQ_LOW_BASE |
2722                         HT_IRQ_LOW_DEST_ID(dest) |
2723 -                       HT_IRQ_LOW_VECTOR(vector) |
2724 +                       HT_IRQ_LOW_VECTOR(cfg->vector) |
2725                         ((INT_DEST_MODE == 0) ?
2726                                 HT_IRQ_LOW_DM_PHYSICAL :
2727                                 HT_IRQ_LOW_DM_LOGICAL) |
2728 @@ -2057,7 +2107,7 @@ int arch_setup_ht_irq(unsigned int irq, 
2729                 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2730                                               handle_edge_irq, "edge");
2731         }
2732 -       return vector;
2733 +       return err;
2734  }
2735  #endif /* CONFIG_HT_IRQ */
2736  
2737 @@ -2082,13 +2132,8 @@ int __init io_apic_get_redir_entries (in
2738  }
2739  
2740  
2741 -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
2742 +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
2743  {
2744 -       struct IO_APIC_route_entry entry;
2745 -       unsigned long flags;
2746 -       int vector;
2747 -       cpumask_t mask;
2748 -
2749         if (!IO_APIC_IRQ(irq)) {
2750                 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",