From: Hans-Joachim Picht <hpicht@redhat.com> Date: Thu, 12 Mar 2009 15:22:03 +0100 Subject: [s390] kernel: NSS Support Message-id: 20090312142203.GB5103@redhat.com O-Subject: [RHEL5 U4 PATCH 1/20] FEAT: s390 - kernel: NSS Support Bugzilla: 474646 RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com> Description ============ This feature allows to save and IPL a kernel image in/from a shared memory area called Named Saved Segment(NSS). It utilizes the memory resources and significantly speeds up the boot process in a horizontally scaling server hosting environment. If a Linux guest machine is IPLed from a Kernel NSS with more than one CPU, this support requires z/VM 5.1 with PTF for APAR VM64103, z/VM 5.2 with PTF for APAR VM64103, or z/VM 5.3 without PTF. Linux guest machines using this support are restricted to run with one CPU if they run on releases prior to z/VM 5.1. Bugzilla ========= BZ 474646 https://bugzilla.redhat.com/show_bug.cgi?id=474646 Upstream status of the patch: ============================= The patch is upstream as of git commit fe355b7f1c7400cbb71762a1237461be03f88265 Test status: ============ The patch has been tested by the IBM test department. Please ACK. With best regards, --Hans diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 4ef44e5..24a7e23 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -25,7 +25,6 @@ static char cpcmd_buf[241]; */ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) { - const int mask = 0x40000000L; unsigned long flags; int return_code; int return_len; @@ -37,65 +36,48 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) memcpy(cpcmd_buf, cmd, cmdlen); ASCEBC(cpcmd_buf, cmdlen); - if (response != NULL && rlen > 0) { + if (response != NULL && rlen > 0) memset(response, 0, rlen); + + if (response != NULL && rlen > 0) { + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = (addr_t) response; + register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L; + register unsigned long reg5 asm ("5") = rlen; + + asm volatile( #ifndef CONFIG_64BIT - asm volatile ( "lra 2,0(%2)\n" - "lr 4,%3\n" - "o 4,%6\n" - "lra 3,0(%4)\n" - "lr 5,%5\n" - "diag 2,4,0x8\n" - "brc 8, 1f\n" - "ar 5, %5\n" - "1: \n" - "lr %0,4\n" - "lr %1,5\n" - : "=d" (return_code), "=d" (return_len) - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); + " diag %2,%0,0x8\n" + " brc 8,1f\n" + " ar %1,%4\n" #else /* CONFIG_64BIT */ - asm volatile ( "lrag 2,0(%2)\n" - "lgr 4,%3\n" - "o 4,%6\n" - "lrag 3,0(%4)\n" - "lgr 5,%5\n" - "sam31\n" - "diag 2,4,0x8\n" - "sam64\n" - "brc 8, 1f\n" - "agr 5, %5\n" - "1: \n" - "lgr %0,4\n" - "lgr %1,5\n" - : "=d" (return_code), "=d" (return_len) - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); + " sam31\n" + " diag %2,%0,0x8\n" + " sam64\n" + " brc 8,1f\n" + " agr %1,%4\n" #endif /* CONFIG_64BIT */ + "1:\n" + : "+d" (reg4), "+d" (reg5) + : "d" (reg2), "d" (reg3), "d" (rlen) : "cc"); + return_code = (int) reg4; + return_len = (int) reg5; EBCASC(response, rlen); } else { + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = cmdlen; + return_len = 0; + asm volatile( #ifndef CONFIG_64BIT - asm volatile ( "lra 2,0(%1)\n" - "lr 3,%2\n" - "diag 2,3,0x8\n" - "lr %0,3\n" - : "=d" (return_code) - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); + " diag %1,%0,0x8\n" #else /* CONFIG_64BIT */ - asm volatile ( "lrag 2,0(%1)\n" - "lgr 3,%2\n" - "sam31\n" - "diag 2,3,0x8\n" - "sam64\n" - "lgr %0,3\n" - : "=d" (return_code) - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); + " sam31\n" + " diag %1,%0,0x8\n" + " sam64\n" #endif /* CONFIG_64BIT */ + : "+d" (reg3) : "d" (reg2) : "cc"); + return_code = (int) reg3; } spin_unlock_irqrestore(&cpcmd_lock, flags); if (response_code != NULL) diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index bcad2a5..9ac410b 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -418,24 +418,6 @@ start: .gotr: l %r10,.tbl # EBCDIC to ASCII table tr 0(240,%r8),0(%r10) - stidp __LC_CPUID # Are we running on VM maybe - cli __LC_CPUID,0xff - bnz .test - .long 0x83300060 # diag 3,0,x'0060' - storage size - b .done -.test: - mvc 0x68(8),.pgmnw # set up pgm check handler - l %r2,.fourmeg - lr %r3,%r2 - bctr %r3,%r0 # 4M-1 -.loop: iske %r0,%r3 - ar %r3,%r2 -.pgmx: - sr %r3,%r2 - la %r3,1(%r3) -.done: - l %r1,.memsize - st %r3,ARCH_OFFSET(%r1) slr %r0,%r0 st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) @@ -443,9 +425,6 @@ start: .tbl: .long _ebcasc # translate table .cmd: .long COMMAND_LINE # address of command line buffer .parm: .long PARMAREA -.memsize: .long memory_size -.fourmeg: .long 0x00400000 # 4M -.pgmnw: .long 0x00080000,.pgmx .lowcase: .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f @@ -481,65 +460,6 @@ start: .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff -.macro GET_IPL_DEVICE -.Lget_ipl_device: - l %r1,0xb8 # get sid - sll %r1,15 # test if subchannel is enabled - srl %r1,31 - ltr %r1,%r1 - bz 2f-.LPG1(%r13) # subchannel disabled - l %r1,0xb8 - la %r5,.Lipl_schib-.LPG1(%r13) - stsch 0(%r5) # get schib of subchannel - bnz 2f-.LPG1(%r13) # schib not available - tm 5(%r5),0x01 # devno valid? - bno 2f-.LPG1(%r13) - la %r6,ipl_parameter_flags-.LPG1(%r13) - oi 3(%r6),0x01 # set flag - la %r2,ipl_devno-.LPG1(%r13) - mvc 0(2,%r2),6(%r5) # store devno - tm 4(%r5),0x80 # qdio capable device? - bno 2f-.LPG1(%r13) - oi 3(%r6),0x02 # set flag - - # copy ipl parameters - - lhi %r0,4096 - l %r2,20(%r0) # get address of parameter list - lhi %r3,IPL_PARMBLOCK_ORIGIN - st %r3,20(%r0) - lhi %r4,1 - cr %r2,%r3 # start parameters < destination ? - jl 0f - lhi %r1,1 # copy direction is upwards - j 1f -0: lhi %r1,-1 # copy direction is downwards - ar %r2,%r0 - ar %r3,%r0 - ar %r2,%r1 - ar %r3,%r1 -1: mvc 0(1,%r3),0(%r2) # finally copy ipl parameters - ar %r3,%r1 - ar %r2,%r1 - sr %r0,%r4 - jne 1b - b 2f-.LPG1(%r13) - - .align 4 -.Lipl_schib: - .rept 13 - .long 0 - .endr - - .globl ipl_parameter_flags -ipl_parameter_flags: - .long 0 - .globl ipl_devno -ipl_devno: - .word 0 -2: -.endm - #ifdef CONFIG_64BIT #include "head64.S" #else diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 41dd161..ec329ae 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -38,23 +38,25 @@ startup:basr %r13,0 # get base startup_continue: basr %r13,0 # get base .LPG1: mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) - GET_IPL_DEVICE lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12) - # -# clear bss memory +# Setup stack # - l %r2,.Lbss_bgn-.LPG1(%r13) # start of bss - l %r3,.Lbss_end-.LPG1(%r13) # end of bss - sr %r3,%r2 # length of bss - sr %r4,%r4 - sr %r5,%r5 # set src,length and pad to zero - sr %r0,%r0 - mvcle %r2,%r4,0 # clear mem - jo .-4 # branch back, if not finish + l %r15,.Linittu-.LPG1(%r13) + mvc __LC_CURRENT(4),__TI_task(%r15) + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE + st %r15,__LC_KERNEL_STACK # set end of kernel stack + ahi %r15,-96 + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain +# +# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, +# and create a kernel NSS if the SAVESYS= parm is defined +# + l %r14,.Lstartup_init-.LPG1(%r13) + basr %r14,%r14 l %r2,.Lrcp-.LPG1(%r13) # Read SCP forced command word .Lservicecall: @@ -115,6 +117,10 @@ startup_continue: b .Lfchunk-.LPG1(%r13) .align 4 +.Linittu: + .long init_thread_union +.Lstartup_init: + .long startup_init .Lpmask: .byte 0 .align 8 @@ -142,18 +148,21 @@ startup_continue: slr %r4,%r4 # set start of chunk to zero slr %r5,%r5 # set end of chunk to zero slr %r6,%r6 # set access code to zero - la %r10, MEMORY_CHUNKS # number of chunks + la %r10,MEMORY_CHUNKS # number of chunks .Lloop: tprot 0(%r5),0 # test protection of first byte ipm %r7 srl %r7,28 clr %r6,%r7 # compare cc with last access code be .Lsame-.LPG1(%r13) - b .Lchkmem-.LPG1(%r13) + lhi %r8,0 # no program checks + b .Lsavchk-.LPG1(%r13) .Lsame: ar %r5,%r1 # add 128KB to end of chunk bno .Lloop-.LPG1(%r13) # r1 < 0x80000000 -> loop .Lchkmem: # > 2GB or tprot got a program check + lhi %r8,1 # set program check flag +.Lsavchk: clr %r4,%r5 # chunk size > 0? be .Lchkloop-.LPG1(%r13) st %r4,0(%r3) # store start address of chunk @@ -162,8 +171,6 @@ startup_continue: st %r0,4(%r3) # store size of chunk st %r6,8(%r3) # store type of chunk la %r3,12(%r3) - l %r4,.Lmemsize-.LPG1(%r13) # address of variable memory_size - st %r5,0(%r4) # store last end to memory size ahi %r10,-1 # update chunk number .Lchkloop: lr %r6,%r7 # set access code to last cc @@ -177,27 +184,20 @@ startup_continue: je .Ldonemem # if not, leave chi %r10,0 # do we have chunks left? je .Ldonemem + chi %r8,1 # program check ? + je .Lpgmchk + lr %r4,%r5 # potential new chunk + alr %r5,%r1 # add 128KB to end of chunk + j .Llpcnt +.Lpgmchk: alr %r5,%r1 # add 128KB to end of chunk lr %r4,%r5 # potential new chunk +.Llpcnt: clr %r5,%r9 # should we go on? jl .Lloop .Ldonemem: l %r12,.Lmflags-.LPG1(%r13) # get address of machine_flags # -# find out if we are running under VM -# - stidp __LC_CPUID # store cpuid - tm __LC_CPUID,0xff # running under VM ? - bno .Lnovm-.LPG1(%r13) - oi 3(%r12),1 # set VM flag -.Lnovm: - lh %r0,__LC_CPUID+4 # get cpu version - chi %r0,0x7490 # running on a P/390 ? - bne .Lnop390-.LPG1(%r13) - oi 3(%r12),4 # set P/390 flag -.Lnop390: - -# # find out if we have an IEEE fpu # mvc __LC_PGM_NEW_PSW(8),.Lpcfpu-.LPG1(%r13) @@ -278,13 +278,25 @@ startup_continue: .Lpcmvpg:.long 0x00080000,0x80000000 + .Lchkmvpg .Lpcidte:.long 0x00080000,0x80000000 + .Lchkidte .Lpcdiag9c:.long 0x00080000,0x80000000 + .Lchkdiag9c -.Lmemsize:.long memory_size .Lmchunk:.long memory_chunk .Lmflags:.long machine_flags .Lbss_bgn: .long __bss_start .Lbss_end: .long _end .Lparmaddr: .long PARMAREA .Lsccbaddr: .long .Lsccb + .globl ipl_schib +ipl_schib: + .rept 13 + .long 0 + .endr + + .globl ipl_flags +ipl_flags: + .long 0 + .globl ipl_devno +ipl_devno: + .word 0 + .org 0x12000 .globl s390_readinfo_sccb s390_readinfo_sccb: @@ -316,16 +328,6 @@ s390_readinfo_sccb: .globl _stext _stext: basr %r13,0 # get base .LPG3: -# -# Setup stack -# - l %r15,.Linittu-.LPG3(%r13) - mvc __LC_CURRENT(4),__TI_task(%r15) - ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE - st %r15,__LC_KERNEL_STACK # set end of kernel stack - ahi %r15,-96 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain - # check control registers stctl %c0,%c15,0(%r15) oi 2(%r15),0x40 # enable sigp emergency signal @@ -344,6 +346,5 @@ _stext: basr %r13,0 # get base # .align 8 .Ldw: .long 0x000a0000,0x00000000 -.Linittu:.long init_thread_union .Lstart:.long start_kernel .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 587625b..47ebd9f 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -99,7 +99,6 @@ startup_continue: #endif /* CONFIG_ZFCPDUMP */ mvi __LC_AR_MODE_ID,1 # set esame flag - GET_IPL_DEVICE lhi %r1,1 # mode 1 = esame mvi __LC_AR_MODE_ID,1 # set esame flag slr %r0,%r0 # set cpuid to zero @@ -111,15 +110,20 @@ startup_continue: mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) # -# clear bss memory +# Setup stack +# + larl %r15,init_thread_union + lg %r14,__TI_task(%r15) # cache current in lowcore + stg %r14,__LC_CURRENT + aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE + stg %r15,__LC_KERNEL_STACK # set end of kernel stack + aghi %r15,-160 + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain +# +# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, +# and create a kernel NSS if the SAVESYS= parm is defined # - larl %r2,__bss_start # start of bss segment - larl %r3,_end # end of bss segment - sgr %r3,%r2 # length of bss - sgr %r4,%r4 # - sgr %r5,%r5 # set src,length and pad to zero - mvcle %r2,%r4,0 # clear mem - jo .-4 # branch back, if not finish + brasl %r14,startup_init l %r2,.Lrcp-.LPG1(%r13) # Read SCP forced command word .Lservicecall: @@ -211,12 +215,15 @@ startup_continue: srl %r7,28 clr %r6,%r7 # compare cc with last access code je .Lsame - j .Lchkmem + lghi %r8,0 # no program checks + j .Lsavchk .Lsame: algr %r5,%r1 # add 128KB to end of chunk # no need to check here, brc 12,.Lloop # this is the same chunk .Lchkmem: # > 16EB or tprot got a program check + lghi %r8,1 # set program check flag +.Lsavchk: clgr %r4,%r5 # chunk size > 0? je .Lchkloop stg %r4,0(%r3) # store start address of chunk @@ -225,8 +232,6 @@ startup_continue: stg %r0,8(%r3) # store size of chunk st %r6,20(%r3) # store type of chunk la %r3,24(%r3) - larl %r8,memory_size - stg %r5,0(%r8) # store memory size ahi %r10,-1 # update chunk number .Lchkloop: lr %r6,%r7 # set access code to last cc @@ -245,27 +250,21 @@ startup_continue: chi %r10, 0 # do we have chunks left? je .Ldonemem .Lhsaskip: + chi %r8,1 # program check ? + je .Lpgmchk + lgr %r4,%r5 # potential new chunk + algr %r5,%r1 # add 128KB to end of chunk + j .Llpcnt +.Lpgmchk: algr %r5,%r1 # add 128KB to end of chunk lgr %r4,%r5 # potential new chunk +.Llpcnt: clgr %r5,%r9 # should we go on? jl .Lloop .Ldonemem: larl %r12,machine_flags # -# find out if we are running under VM -# - stidp __LC_CPUID # store cpuid - tm __LC_CPUID,0xff # running under VM ? - bno 0f-.LPG1(%r13) - oi 7(%r12),1 # set VM flag -0: lh %r0,__LC_CPUID+4 # get cpu version - chi %r0,0x7490 # running on a P/390 ? - bne 1f-.LPG1(%r13) - oi 7(%r12),4 # set P/390 flag -1: - -# # find out if we have the MVPG instruction # la %r1,0f-.LPG1(%r13) # set program check address @@ -358,6 +357,18 @@ startup_continue: .Lparmaddr: .quad PARMAREA + .globl ipl_schib +ipl_schib: + .rept 13 + .long 0 + .endr + .globl ipl_flags +ipl_flags: + .long 0 + .globl ipl_devno +ipl_devno: + .word 0 + .org 0x12000 .globl s390_readinfo_sccb s390_readinfo_sccb: @@ -389,24 +400,12 @@ s390_readinfo_sccb: .globl _stext _stext: basr %r13,0 # get base .LPG3: -# -# Setup stack -# - larl %r15,init_thread_union - lg %r14,__TI_task(%r15) # cache current in lowcore - stg %r14,__LC_CURRENT - aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE - stg %r15,__LC_KERNEL_STACK # set end of kernel stack - aghi %r15,-160 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain - # check control registers stctg %c0,%c15,0(%r15) oi 6(%r15),0x40 # enable sigp emergency signal oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) -# lam 0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess brasl %r14,start_kernel # go to C code # @@ -414,7 +413,7 @@ _stext: basr %r13,0 # get base # basr %r13,0 lpswe .Ldw-.(%r13) # load disabled wait psw -# + .align 8 .Ldw: .quad 0x0002000180000000,0x0000000000000000 .Laregs: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 55b0a0d..7c199d0 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -32,6 +32,7 @@ extern char s390_readinfo_sccb[]; #define IPL_UNKNOWN_STR "unknown" #define IPL_CCW_STR "ccw" #define IPL_FCP_STR "fcp" +#define IPL_NSS_STR "nss" #define IPL_FCP_DUMP_STR "fcp_dump" static char *ipl_type_str(enum ipl_type type) @@ -43,6 +44,8 @@ static char *ipl_type_str(enum ipl_type type) return IPL_FCP_STR; case IPL_TYPE_FCP_DUMP: return IPL_FCP_DUMP_STR; + case IPL_TYPE_NSS: + return IPL_NSS_STR; case IPL_TYPE_UNKNOWN: default: return IPL_UNKNOWN_STR; @@ -80,6 +83,7 @@ enum ipl_method { REIPL_METHOD_FCP_RO_DIAG, REIPL_METHOD_FCP_RW_DIAG, REIPL_METHOD_FCP_RO_VM, + REIPL_METHOD_NSS, REIPL_METHOD_FCP_DUMP, REIPL_METHOD_DEFAULT, }; @@ -125,6 +129,8 @@ static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT; static struct ipl_parameter_block *reipl_block_fcp; static struct ipl_parameter_block *reipl_block_ccw; +static char reipl_nss_name[NSS_NAME_SIZE + 1]; + static int dump_capabilities = DUMP_TYPE_NONE; static enum dump_type dump_type = DUMP_TYPE_NONE; static enum dump_method dump_method = DUMP_METHOD_NONE; @@ -189,6 +195,24 @@ static struct subsys_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store); +#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ +static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys, \ + char *page) \ +{ \ + return sprintf(page, _fmt_out, _value); \ +} \ +static ssize_t sys_##_prefix##_##_name##_store(struct subsystem *subsys,\ + const char *buf, size_t len) \ +{ \ + if (sscanf(buf, _fmt_in, _value) != 1) \ + return -EINVAL; \ + return len; \ +} \ +static struct subsys_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name,(S_IRUGO | S_IWUSR), \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store); + static void make_attrs_ro(struct attribute **attrs) { while (*attrs) { @@ -205,9 +229,11 @@ static enum ipl_type get_ipl_type(void) { struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - if (!(ipl_parameter_flags & IPL_DEVNO_VALID)) + if (ipl_flags & IPL_NSS_VALID) + return IPL_TYPE_NSS; + if (!(ipl_flags & IPL_DEVNO_VALID)) return IPL_TYPE_UNKNOWN; - if (!(ipl_parameter_flags & IPL_PARMBLOCK_VALID)) + if (!(ipl_flags & IPL_PARMBLOCK_VALID)) return IPL_TYPE_CCW; if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION) return IPL_TYPE_UNKNOWN; @@ -234,6 +260,7 @@ void __init setup_ipl_info(void) ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn; ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun; break; + case IPL_TYPE_NSS: case IPL_TYPE_UNKNOWN: default: /* We have no info to copy */ @@ -368,6 +395,20 @@ static struct attribute_group ipl_ccw_attr_group = { .attrs = ipl_ccw_attrs, }; +/* NSS ipl device attributes */ + +DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name); + +static struct attribute *ipl_nss_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_nss_name_attr.attr, + NULL, +}; + +static struct attribute_group ipl_nss_attr_group = { + .attrs = ipl_nss_attrs, +}; + /* UNKNOWN ipl device attributes */ static struct attribute *ipl_unknown_attrs[] = { @@ -476,6 +517,21 @@ static struct attribute_group reipl_ccw_attr_group = { .attrs = reipl_ccw_attrs, }; + +/* NSS reipl device attributes */ + +DEFINE_IPL_ATTR_STR_RW(reipl_nss, name, "%s\n", "%s\n", reipl_nss_name); + +static struct attribute *reipl_nss_attrs[] = { + &sys_reipl_nss_name_attr.attr, + NULL, +}; + +static struct attribute_group reipl_nss_attr_group = { + .name = IPL_NSS_STR, + .attrs = reipl_nss_attrs, +}; + /* reipl type */ static int reipl_set_type(enum ipl_type type) @@ -504,6 +560,9 @@ static int reipl_set_type(enum ipl_type type) case IPL_TYPE_UNKNOWN: reipl_method = REIPL_METHOD_DEFAULT; break; + case IPL_TYPE_NSS: + reipl_method = REIPL_METHOD_NSS; + break; default: BUG(); } @@ -525,6 +584,8 @@ static ssize_t reipl_type_store(struct subsystem *subsys, const char *buf, rc = reipl_set_type(IPL_TYPE_CCW); else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) rc = reipl_set_type(IPL_TYPE_FCP); + else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_NSS); return (rc != 0) ? rc : len; } @@ -716,6 +777,10 @@ void do_reipl(void) case REIPL_METHOD_FCP_RO_VM: cpcmd("IPL", NULL, 0, NULL); break; + case REIPL_METHOD_NSS: + sprintf(buf, "IPL %s", reipl_nss_name); + __cpcmd(buf, NULL, 0, NULL); + break; case REIPL_METHOD_DEFAULT: if (MACHINE_IS_VM) cpcmd("IPL", NULL, 0, NULL); @@ -808,6 +873,10 @@ static int __init ipl_init(void) case IPL_TYPE_FCP_DUMP: rc = ipl_register_fcp_files(); break; + case IPL_TYPE_NSS: + rc = sysfs_create_group(&ipl_subsys.kset.kobj, + &ipl_nss_attr_group); + break; default: rc = sysfs_create_group(&ipl_subsys.kset.kobj, &ipl_unknown_attr_group); @@ -832,6 +901,20 @@ static void __init reipl_probe(void) free_page((unsigned long)buffer); } +static int __init reipl_nss_init(void) +{ + int rc; + + if (!MACHINE_IS_VM) + return 0; + rc = sysfs_create_group(&reipl_subsys.kset.kobj, &reipl_nss_attr_group); + if (rc) + return rc; + strncpy(reipl_nss_name, kernel_nss_name, NSS_NAME_SIZE + 1); + reipl_capabilities |= IPL_TYPE_NSS; + return 0; +} + static int __init reipl_ccw_init(void) { int rc; @@ -913,6 +996,9 @@ static int __init reipl_init(void) rc = reipl_fcp_init(); if (rc) return rc; + rc = reipl_nss_init(); + if (rc) + return rc; rc = reipl_set_type(ipl_info.type); if (rc) return rc; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 60b1ea9..d02165c 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -22,6 +22,7 @@ #include <asm/pgalloc.h> #include <asm/system.h> #include <asm/smp.h> +#include <asm/ipl.h> static void kexec_halt_all_cpus(void *); @@ -35,6 +36,10 @@ machine_kexec_prepare(struct kimage *image) { unsigned long reboot_code_buffer; + /* Can't replace kernel image since it is read-only. */ + if (ipl_flags & IPL_NSS_VALID) + return -ENOSYS; + /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index fe82c0f..6b4a019 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -38,6 +38,7 @@ #include <linux/device.h> #include <linux/notifier.h> #include <linux/topology.h> +#include <linux/ctype.h> #include <asm/ipl.h> #include <asm/uaccess.h> @@ -51,6 +52,7 @@ #include <asm/ptrace.h> #include <asm/sections.h> #include <asm/cio.h> +#include <asm/ebcdic.h> /* * Machine setup.. @@ -58,15 +60,11 @@ unsigned int console_mode = 0; unsigned int console_devno = -1; unsigned int console_irq = -1; -unsigned long memory_size = 0; unsigned long machine_flags = 0; unsigned long elf_hwcap = 0; + +struct mem_chunk memory_chunk[MEMORY_CHUNKS]; char elf_platform[ELF_PLATFORM_SIZE]; -struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS] = { { 0 } }; -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ unsigned long __initdata zholes_size[MAX_NR_ZONES]; static unsigned long __initdata memory_end; @@ -298,6 +296,142 @@ static void __init setup_zfcpdump(unsigned int console_devno) static inline void setup_zfcpdump(unsigned int console_devno) {} #endif /* CONFIG_ZFCPDUMP */ +/* + * Create a Kernel NSS if the SAVESYS= parameter is defined +*/ +#define DEFSYS_CMD_SIZE 96 +#define SAVESYS_CMD_SIZE 32 + +char kernel_nss_name[NSS_NAME_SIZE + 1]; + +#ifdef CONFIG_SHARED_KERNEL +static __init void create_kernel_nss(void) +{ + unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; +#ifdef CONFIG_BLK_DEV_INITRD + unsigned int sinitrd_pfn, einitrd_pfn; +#endif + int response; + char *savesys_ptr; + char upper_command_line[COMMAND_LINE_SIZE]; + char defsys_cmd[DEFSYS_CMD_SIZE]; + char savesys_cmd[SAVESYS_CMD_SIZE]; + + /* Do nothing if we are not running under VM */ + if (!MACHINE_IS_VM) + return; + + /* Convert COMMAND_LINE to upper case */ + for (i = 0; i < strlen(COMMAND_LINE); i++) + upper_command_line[i] = toupper(COMMAND_LINE[i]); + + savesys_ptr = strstr(upper_command_line, "SAVESYS="); + + if (!savesys_ptr) + return; + + savesys_ptr += 8; /* Point to the beginning of the NSS name */ + for (i = 0; i < NSS_NAME_SIZE; i++) { + if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0') + break; + kernel_nss_name[i] = savesys_ptr[i]; + } + + stext_pfn = PFN_DOWN(__pa(&_stext)); + eshared_pfn = PFN_DOWN(__pa(&_eshared)); + end_pfn = PFN_UP(__pa(&_end)); + min_size = end_pfn << 2; + + sprintf(defsys_cmd, "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", + kernel_nss_name, stext_pfn - 1, stext_pfn, eshared_pfn - 1, + eshared_pfn, end_pfn); + +#ifdef CONFIG_BLK_DEV_INITRD + if (INITRD_START && INITRD_SIZE) { + sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); + einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); + min_size = einitrd_pfn << 2; + sprintf(defsys_cmd, "%s EW %.5X-%.5X", defsys_cmd, + sinitrd_pfn, einitrd_pfn); + } +#endif + + sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK", defsys_cmd, min_size); + sprintf(savesys_cmd, "SAVESYS %s \n IPL %s", + kernel_nss_name, kernel_nss_name); + + __cpcmd(defsys_cmd, NULL, 0, &response); + + if (response != 0) { + kernel_nss_name[0] = '\0'; + return; + } + + __cpcmd(savesys_cmd, NULL, 0, &response); + + if (response != strlen(savesys_cmd)) { + kernel_nss_name[0] = '\0'; + return; + } + + ipl_flags = IPL_NSS_VALID; +} + +#else /* CONFIG_SHARED_KERNEL */ + +static inline void create_kernel_nss(void) { } + +#endif /* CONFIG_SHARED_KERNEL */ + +/* + * Clear bss memory + */ +static __init void clear_bss_section(void) +{ + memset(__bss_start, 0, __bss_stop - __bss_start); +} + +/* + * Initialize storage key for kernel pages + */ +static __init void init_kernel_storage_key(void) +{ + unsigned long end_pfn, init_pfn; + + end_pfn = PFN_UP(__pa(&_end)); + + for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) + page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); +} + +static __init void detect_machine_type(void) +{ + struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data; + + asm volatile("stidp %0" : "=m" (S390_lowcore.cpu_data.cpu_id)); + + /* Running under z/VM ? */ + if (cpuinfo->cpu_id.version == 0xff) + machine_flags |= 1; + + /* Running on a P/390 ? */ + if (cpuinfo->cpu_id.machine == 0x7490) + machine_flags |= 4; +} + +/* + * Save ipl parameters, clear bss memory, initialize storage keys + * and create a kernel NSS at startup if the SAVESYS= parm is defined + */ +void __init startup_init(void) +{ + ipl_save_parameters(); + clear_bss_section(); + init_kernel_storage_key(); + detect_machine_type(); + create_kernel_nss(); +} + #ifdef CONFIG_SMP extern void machine_restart_smp(char *); extern void machine_halt_smp(void); @@ -478,7 +612,7 @@ setup_lowcore(void) static void __init setup_resources(void) { - struct resource *res; + struct resource *res, *sub_res; int i; code_resource.start = (unsigned long) &_text; @@ -503,11 +637,72 @@ setup_resources(void) res->start = memory_chunk[i].addr; res->end = memory_chunk[i].addr + memory_chunk[i].size - 1; request_resource(&iomem_resource, res); - request_resource(res, &code_resource); - request_resource(res, &data_resource); + + if (code_resource.start >= res->start && + code_resource.start <= res->end && + code_resource.end > res->end) { + sub_res = alloc_bootmem_low(sizeof(struct resource)); + memcpy(sub_res, &code_resource, + sizeof(struct resource)); + sub_res->end = res->end; + code_resource.start = res->end + 1; + request_resource(res, sub_res); + } + + if (code_resource.start >= res->start && + code_resource.start <= res->end && + code_resource.end <= res->end) + request_resource(res, &code_resource); + + if (data_resource.start >= res->start && + data_resource.start <= res->end && + data_resource.end > res->end) { + sub_res = alloc_bootmem_low(sizeof(struct resource)); + memcpy(sub_res, &data_resource, + sizeof(struct resource)); + sub_res->end = res->end; + data_resource.start = res->end + 1; + request_resource(res, sub_res); + } + + if (data_resource.start >= res->start && + data_resource.start <= res->end && + data_resource.end <= res->end) + request_resource(res, &data_resource); } } +static void __init setup_memory_end(void) +{ + unsigned long real_size, memory_size; + unsigned long max_mem, max_phys; + int i; + + memory_size = real_size = 0; + max_phys = VMALLOC_END - VMALLOC_MIN_SIZE; + memory_end &= PAGE_MASK; + + max_mem = memory_end ? min(max_phys, memory_end) : max_phys; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; + + real_size = max(real_size, chunk->addr + chunk->size); + if (chunk->addr >= max_mem) { + memset(chunk, 0, sizeof(*chunk)); + continue; + } + if (chunk->addr + chunk->size > max_mem) + chunk->size = max_mem - chunk->addr; + memory_size = max(memory_size, chunk->addr + chunk->size); + } + if (!memory_end) + memory_end = memory_size; + if (real_size > memory_end) + printk("More memory detected than supported. Unused: %luk\n", + (real_size - memory_end) >> 10); +} + unsigned long real_memory_size; EXPORT_SYMBOL_GPL(real_memory_size); @@ -526,10 +721,6 @@ setup_memory(void) start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = max_pfn = memory_end >> PAGE_SHIFT; - /* Initialize storage key for kernel pages */ - for (init_pfn = 0 ; init_pfn < start_pfn; init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); - /* * Initialize the boot-time allocator (with low memory only): */ @@ -736,24 +927,9 @@ setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; - memory_end = memory_size; - parse_early_param(); -#ifndef CONFIG_64BIT - memory_end &= ~0x400000UL; - - /* - * We need some free virtual space to be able to do vmalloc. - * On a machine with 2GB memory we make sure that we have at - * least 128 MB free space for vmalloc. - */ - if (memory_end > 1920*1024*1024) - memory_end = 1920*1024*1024; -#else /* CONFIG_64BIT */ - memory_end &= ~0x200000UL; -#endif /* CONFIG_64BIT */ - + setup_memory_end(); setup_ipl_info(); #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) if (ipl_info.type == IPL_TYPE_FCP_DUMP) { diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 5630caf..5bb46b6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -31,11 +31,6 @@ SECTIONS _etext = .; /* End of text section */ - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - RODATA #ifdef CONFIG_SHARED_KERNEL @@ -44,6 +39,11 @@ SECTIONS _eshared = .; /* End of shareable data */ #endif + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + .data : { /* Data */ *(.data) MARKERS_DATA diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 9b11e3e..39ec327 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -14,12 +14,13 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/bootmem.h> +#include <linux/ctype.h> #include <asm/page.h> #include <asm/ebcdic.h> #include <asm/errno.h> #include <asm/extmem.h> #include <asm/cpcmd.h> -#include <linux/ctype.h> +#include <asm/setup.h> #define DCSS_DEBUG /* Debug messages on/off */ @@ -82,10 +83,6 @@ static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; -extern struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS]; - /* * Create the 8 bytes, ebcdic VM segment name from * an ascii name. @@ -249,8 +246,8 @@ segment_overlaps_storage(struct dcss_segment *seg) { int i; - for (i=0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type != 0) + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type != CHUNK_READ_WRITE) continue; if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) continue; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index a6df5e4..b1f8348 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -21,6 +21,8 @@ #include <asm/delay.h> #include <asm/irq.h> #include <asm/chpid.h> +#include <asm/setup.h> +#include <asm/ipl.h> #include "airq.h" #include "cio.h" @@ -901,3 +903,38 @@ void reipl_ccw_dev(struct ccw_dev_id *devid) cio_reset_channel_paths(); do_reipl_asm(*((__u32*)&schid)); } + +extern struct schib ipl_schib; + +/* + * ipl_save_parameters gets called very early. It is not allowed to access + * anything in the bss section at all. The bss section is not cleared yet, + * but may contain some ipl parameters written by the firmware. + * These parameters (if present) are copied to 0x2000. + * To avoid corruption of the ipl parameters, all variables used by this + * function must reside on the stack or in the data section. + */ +void ipl_save_parameters(void) +{ + struct subchannel_id schid; + unsigned int *ipl_ptr; + void *src, *dst; + + schid = *(struct subchannel_id *)__LC_SUBCHANNEL_ID; + if (!schid.one) + return; + if (stsch(schid, &ipl_schib)) + return; + if (!ipl_schib.pmcw.dnv) + return; + ipl_devno = ipl_schib.pmcw.dev; + ipl_flags |= IPL_DEVNO_VALID; + if (!ipl_schib.pmcw.qf) + return; + ipl_flags |= IPL_PARMBLOCK_VALID; + ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR; + src = (void *)(unsigned long)*ipl_ptr; + dst = (void *)IPL_PARMBLOCK_ORIGIN; + memmove(dst, src, PAGE_SIZE); + *ipl_ptr = IPL_PARMBLOCK_ORIGIN; +} diff --git a/include/asm-s390/ipl.h b/include/asm-s390/ipl.h index c0241d9..b57058a 100644 --- a/include/asm-s390/ipl.h +++ b/include/asm-s390/ipl.h @@ -96,6 +96,7 @@ enum ipl_type { IPL_TYPE_CCW = 2, IPL_TYPE_FCP = 4, IPL_TYPE_FCP_DUMP = 8, + IPL_TYPE_NSS = 16, }; struct ipl_info diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h index 9b05b5a..57af032 100644 --- a/include/asm-s390/lowcore.h +++ b/include/asm-s390/lowcore.h @@ -35,6 +35,7 @@ #define __LC_IO_NEW_PSW 0x01f0 #endif /* !__s390x__ */ +#define __LC_IPL_PARMBLOCK_PTR 0x014 #define __LC_EXT_PARAMS 0x080 #define __LC_CPU_ADDRESS 0x084 #define __LC_EXT_INT_CODE 0x086 diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index 58b6643..eddeb1b 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -192,6 +192,8 @@ page_get_storage_key(unsigned long addr) /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) #define __PAGE_OFFSET 0x0UL #define PAGE_OFFSET 0x0UL diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 3515ed8..508d659 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -110,13 +110,22 @@ extern char empty_zero_page[PAGE_SIZE]; #define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \ & ~(VMALLOC_OFFSET-1)) + +/* + * We need some free virtual space to be able to do vmalloc. + * VMALLOC_MIN_SIZE defines the minimum size of the vmalloc + * area. On a machine with 2GB memory we make sure that we + * have at least 128MB free space for vmalloc. On a machine + * with 4TB we make sure we have at least 1GB. + */ #ifndef __s390x__ -# define VMALLOC_END (0x7fffffffL) +#define VMALLOC_MIN_SIZE 0x8000000UL +#define VMALLOC_END 0x80000000UL #else /* __s390x__ */ -# define VMALLOC_END (0x40000000000L) +#define VMALLOC_MIN_SIZE 0x40000000UL +#define VMALLOC_END 0x40000000000UL #endif /* __s390x__ */ - /* * A 31 bit pagetable entry of S390 has following format: * | PFRA | | OS | diff --git a/include/asm-s390/sections.h b/include/asm-s390/sections.h index 3a0b8ff..1c5a2c4 100644 --- a/include/asm-s390/sections.h +++ b/include/asm-s390/sections.h @@ -3,4 +3,6 @@ #include <asm-generic/sections.h> +extern char _eshared[]; + #endif diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index c1d0b37..c1f5e92 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -2,7 +2,7 @@ * include/asm-s390/setup.h * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999,2006 */ #ifndef _ASM_S390_SETUP_H @@ -33,6 +33,17 @@ extern unsigned long real_memory_size; +#define CHUNK_READ_WRITE 0 +#define CHUNK_READ_ONLY 1 + +struct mem_chunk { + unsigned long addr; + unsigned long size; + unsigned long type; +}; + +extern struct mem_chunk memory_chunk[]; + /* * Machine features detected in head.S */ @@ -58,7 +69,6 @@ extern unsigned long machine_flags; #define MACHINE_HAS_CPAGE (machine_flags & 2048) #endif /* __s390x__ */ - #define MACHINE_HAS_SCLP (!MACHINE_IS_P390) #define ZFCPDUMP_HSA_SIZE (32UL<<20) @@ -83,7 +93,11 @@ extern unsigned int console_irq; extern u32 ipl_parameter_flags; extern u16 ipl_devno; -void do_reipl(void); +extern void do_reipl(void); + +#define NSS_NAME_SIZE 8 + +extern char kernel_nss_name[]; #else /* __ASSEMBLY__ */