Skip to content
Snippets Groups Projects
  1. Jul 30, 2018
  2. Jun 12, 2018
    • Kees Cook's avatar
      treewide: kmalloc() -> kmalloc_array() · 6da2ec56
      Kees Cook authored
      
      The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
      patch replaces cases of:
      
              kmalloc(a * b, gfp)
      
      with:
              kmalloc_array(a * b, gfp)
      
      as well as handling cases of:
      
              kmalloc(a * b * c, gfp)
      
      with:
      
              kmalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kmalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kmalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The tools/ directory was manually excluded, since it has its own
      implementation of kmalloc().
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kmalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kmalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kmalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kmalloc
      + kmalloc_array
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kmalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(sizeof(THING) * C2, ...)
      |
        kmalloc(sizeof(TYPE) * C2, ...)
      |
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(C1 * C2, ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      
      Signed-off-by: default avatarKees Cook <keescook@chromium.org>
      6da2ec56
  3. Jun 08, 2018
    • Matthew Wilcox's avatar
      s390: use _refcount for pgtables · 620b4e90
      Matthew Wilcox authored
      Patch series "Rearrange struct page", v6.
      
      As presented at LSFMM, this patch-set rearranges struct page to give
      more contiguous usable space to users who have allocated a struct page
      for their own purposes.  For a graphical view of before-and-after, see
      the first two tabs of
      
        https://docs.google.com/spreadsheets/d/1tvCszs_7FXrjei9_mtFiKV6nW1FLnYyvPvW-qNZhdog/edit?usp=sharing
      
      Highlights:
       - deferred_list now really exists in struct page instead of just a comment.
       - hmm_data also exists in struct page instead of being a nasty hack.
       - x86's PGD pages have a real pointer to the mm_struct.
       - VMalloc pages now have all sorts of extra information stored in them
         to help with debugging and tuning.
       - rcu_head is no longer tied to slab in case anyone else wants to
         free pages by RCU.
       - slub's counters no longer share space with _refcount.
       - slub's freelist+counters are now naturally dword aligned.
       - slub loses a parameter to a lot of functions and a sysfs file.
      
      This patch (of 17):
      
      s390 borrows the storage used for _mapcount in struct page in order to
      account whether the bottom or top half is being used for 2kB page tables.
      I want to use that for something else, so use the top byte of _refcount
      instead of the bottom byte of _mapcount.  _refcount may temporarily be
      incremented by other CPUs that see a stale pointer to this page in the
      page cache, but each CPU can only increment it by one, and there are no
      systems with 2^24 CPUs today, so they will not change the upper byte of
      _refcount.  We do have to be a little careful not to lose any of their
      writes (as they will subsequently decrement the counter).
      
      Link: http://lkml.kernel.org/r/20180518194519.3820-2-willy@infradead.org
      
      
      Signed-off-by: default avatarMatthew Wilcox <mawilcox@microsoft.com>
      Acked-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
      Cc: Christoph Lameter <cl@linux.com>
      Cc: Lai Jiangshan <jiangshanlai@gmail.com>
      Cc: Pekka Enberg <penberg@kernel.org>
      Cc: Vlastimil Babka <vbabka@suse.cz>
      Cc: Dave Hansen <dave.hansen@linux.intel.com>
      Cc: Jérôme Glisse <jglisse@redhat.com>
      Cc: Randy Dunlap <rdunlap@infradead.org>
      Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      620b4e90
  4. May 17, 2018
  5. Apr 25, 2018
    • Eric W. Biederman's avatar
      signal/s390: Use force_sig_fault where appropriate · 9507a5d0
      Eric W. Biederman authored
      
      Filling in struct siginfo before calling force_sig_info a tedious and
      error prone process, where once in a great while the wrong fields
      are filled out, and siginfo has been inconsistently cleared.
      
      Simplify this process by using the helper force_sig_fault.  Which
      takes as a parameters all of the information it needs, ensures
      all of the fiddly bits of filling in struct siginfo are done properly
      and then calls force_sig_info.
      
      In short about a 5 line reduction in code for every time force_sig_info
      is called, which makes the calling function clearer.
      
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: linux-s390@vger.kernel.org
      Acked-by: default avatarMartin Schwidefsky <&gt;schwidefsky@de.ibm.com>
      Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
      9507a5d0
    • Eric W. Biederman's avatar
      signal: Ensure every siginfo we send has all bits initialized · 3eb0f519
      Eric W. Biederman authored
      
      Call clear_siginfo to ensure every stack allocated siginfo is properly
      initialized before being passed to the signal sending functions.
      
      Note: It is not safe to depend on C initializers to initialize struct
      siginfo on the stack because C is allowed to skip holes when
      initializing a structure.
      
      The initialization of struct siginfo in tracehook_report_syscall_exit
      was moved from the helper user_single_step_siginfo into
      tracehook_report_syscall_exit itself, to make it clear that the local
      variable siginfo gets fully initialized.
      
      In a few cases the scope of struct siginfo has been reduced to make it
      clear that siginfo siginfo is not used on other paths in the function
      in which it is declared.
      
      Instances of using memset to initialize siginfo have been replaced
      with calls clear_siginfo for clarity.
      
      Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
      3eb0f519
  6. Apr 14, 2018
  7. Apr 11, 2018
    • Kees Cook's avatar
      exec: pass stack rlimit into mm layout functions · 8f2af155
      Kees Cook authored
      Patch series "exec: Pin stack limit during exec".
      
      Attempts to solve problems with the stack limit changing during exec
      continue to be frustrated[1][2].  In addition to the specific issues
      around the Stack Clash family of flaws, Andy Lutomirski pointed out[3]
      other places during exec where the stack limit is used and is assumed to
      be unchanging.  Given the many places it gets used and the fact that it
      can be manipulated/raced via setrlimit() and prlimit(), I think the only
      way to handle this is to move away from the "current" view of the stack
      limit and instead attach it to the bprm, and plumb this down into the
      functions that need to know the stack limits.  This series implements
      the approach.
      
      [1] 04e35f44 ("exec: avoid RLIMIT_STACK races with prlimit()")
      [2] 779f4e1c ("Revert "exec: avoid RLIMIT_STACK races with prlimit()"")
      [3] to security@kernel.org, "Subject: existing rlimit races?"
      
      This patch (of 3):
      
      Since it is possible that the stack rlimit can change externally during
      exec (either via another thread calling setrlimit() or another process
      calling prlimit()), provide a way to pass the rlimit down into the
      per-architecture mm layout functions so that the rlimit can stay in the
      bprm structure instead of sitting in the signal structure until exec is
      finalized.
      
      Link: http://lkml.kernel.org/r/1518638796-20819-2-git-send-email-keescook@chromium.org
      
      
      Signed-off-by: default avatarKees Cook <keescook@chromium.org>
      Cc: Michal Hocko <mhocko@kernel.org>
      Cc: Ben Hutchings <ben@decadent.org.uk>
      Cc: Willy Tarreau <w@1wt.eu>
      Cc: Hugh Dickins <hughd@google.com>
      Cc: Oleg Nesterov <oleg@redhat.com>
      Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
      Cc: Rik van Riel <riel@redhat.com>
      Cc: Laura Abbott <labbott@redhat.com>
      Cc: Greg KH <greg@kroah.com>
      Cc: Andy Lutomirski <luto@kernel.org>
      Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
      Cc: Brad Spengler <spender@grsecurity.net>
      Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      8f2af155
  8. Feb 27, 2018
  9. Feb 22, 2018
  10. Jan 24, 2018
  11. Jan 23, 2018
  12. Jan 16, 2018
    • David Hildenbrand's avatar
      s390x/mm: cleanup gmap_pte_op_walk() · 96965941
      David Hildenbrand authored
      
      gmap_mprotect_notify() refuses shadow gmaps. Turns out that
      a) gmap_protect_range()
      b) gmap_read_table()
      c) gmap_pte_op_walk()
      
      Are never called for gmap shadows. And never should be. This dates back
      to gmap shadow prototypes where we allowed to call mprotect_notify() on
      the gmap shadow (to get notified about the prefix pages getting removed).
      This is avoided by always getting notified about any change on the gmap
      shadow.
      
      The only real function for walking page tables on shadow gmaps is
      gmap_table_walk().
      
      So, essentially, these functions should never get called and
      gmap_pte_op_walk() can be cleaned up. Add some checks to callers of
      gmap_pte_op_walk().
      
      Signed-off-by: default avatarDavid Hildenbrand <david@redhat.com>
      Message-Id: <20171110151805.7541-1-david@redhat.com>
      Reviewed-by: default avatarJanosch Frank <frankja@linux.vnet.ibm.com>
      Acked-by: default avatarCornelia Huck <cohuck@redhat.com>
      Signed-off-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      96965941
  13. Jan 08, 2018
  14. Dec 05, 2017
  15. Nov 24, 2017
    • Greg Kroah-Hartman's avatar
      s390: Remove redundant license text · fec37202
      Greg Kroah-Hartman authored
      
      Now that the SPDX tag is in all arch/s390/ files, that identifies the
      license in a specific and legally-defined manner.  So the extra GPL text
      wording in the remaining files can be removed as it is no longer needed
      at all.
      
      This is done on a quest to remove the 700+ different ways that files in
      the kernel describe the GPL license text.  And there's unneeded stuff
      like the address (sometimes incorrect) for the FSF which is never
      needed.
      
      No copyright headers or other non-license-description text was removed.
      
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
      fec37202
    • Greg Kroah-Hartman's avatar
      s390: mm: add SPDX identifiers to the remaining files · ac41aaee
      Greg Kroah-Hartman authored
      
      It's good to have SPDX identifiers in all files to make it easier to
      audit the kernel tree for correct licenses.
      
      Update the arch/s390/mm/ files with the correct SPDX license
      identifier based on the license text in the file itself.  The SPDX
      identifier is a legally binding shorthand, which can be used instead of
      the full boiler plate text.
      
      This work is based on a script and data from Thomas Gleixner, Philippe
      Ombredanne, and Kate Stewart.
      
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Christian Borntraeger <borntraeger@de.ibm.com>
      Cc: Cornelia Huck <cohuck@redhat.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Kate Stewart <kstewart@linuxfoundation.org>
      Cc: Philippe Ombredanne <pombredanne@nexb.com>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
      ac41aaee
  16. Nov 21, 2017
    • Kees Cook's avatar
      s390: cmm: Convert timers to use timer_setup() · 6cc73a06
      Kees Cook authored
      
      In preparation for unconditionally passing the struct timer_list pointer to
      all timer callbacks, switch to using the new timer_setup() and from_timer()
      to pass the timer pointer explicitly.
      
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Thomas Gleixner <tglx@linutronix.de>
      Cc: Geert Uytterhoeven <geert@linux-m68k.org>
      Cc: Arnd Bergmann <arnd@arndb.de>
      Cc: Guenter Roeck <linux@roeck-us.net>
      Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
      Cc: linux-s390@vger.kernel.org
      Signed-off-by: default avatarKees Cook <keescook@chromium.org>
      6cc73a06
  17. Nov 16, 2017
    • Hendrik Brueckner's avatar
      s390/cpum_sf: correctly set the PID and TID in perf samples · 544e8dd7
      Hendrik Brueckner authored
      
      The hardware sampler creates samples that are processed at a later
      point in time.  The PID and TID values of the perf samples that are
      created for hardware samples are initialized with values from the
      current task.  Hence, the PID and TID values are not correct and
      perf samples are associated with wrong processes.
      
      The PID and TID values are obtained from the Host Program Parameter
      (HPP) field in the basic-sampling data entries.  These PIDs are
      valid in the init PID namespace.  Ensure that the PIDs in the perf
      samples are resolved considering the PID namespace in which the
      perf event was created.
      
      To correct the PID and TID values in the created perf samples,
      a special overflow handler is installed.  It replaces the default
      overflow handler and does not become effective if any other
      overflow handler is used.  With the special overflow handler most
      of the perf samples are associated with the right processes.
      For processes, that are no longer exist, the association might
      still be wrong.
      
      Signed-off-by: default avatarHendrik Brueckner <brueckner@linux.vnet.ibm.com>
      Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
      544e8dd7
  18. Nov 14, 2017
    • Heiko Carstens's avatar
      s390/mm: remove unused code · 2be1da8d
      Heiko Carstens authored
      
      Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
      2be1da8d
    • Martin Schwidefsky's avatar
      s390: remove all code using the access register mode · 0aaba41b
      Martin Schwidefsky authored
      
      The vdso code for the getcpu() and the clock_gettime() call use the access
      register mode to access the per-CPU vdso data page with the current code.
      
      An alternative to the complicated AR mode is to use the secondary space
      mode. This makes the vdso faster and quite a bit simpler. The downside is
      that the uaccess code has to be changed quite a bit.
      
      Which instructions are used depends on the machine and what kind of uaccess
      operation is requested. The instruction dictates which ASCE value needs
      to be loaded into %cr1 and %cr7.
      
      The different cases:
      
      * User copy with MVCOS for z10 and newer machines
        The MVCOS instruction can copy between the primary space (aka user) and
        the home space (aka kernel) directly. For set_fs(KERNEL_DS) the kernel
        ASCE is loaded into %cr1. For set_fs(USER_DS) the user space is already
        loaded in %cr1.
      
      * User copy with MVCP/MVCS for older machines
        To be able to execute the MVCP/MVCS instructions the kernel needs to
        switch to primary mode. The control register %cr1 has to be set to the
        kernel ASCE and %cr7 to either the kernel ASCE or the user ASCE dependent
        on set_fs(KERNEL_DS) vs set_fs(USER_DS).
      
      * Data access in the user address space for strnlen / futex
        To use "normal" instruction with data from the user address space the
        secondary space mode is used. The kernel needs to switch to primary mode,
        %cr1 has to contain the kernel ASCE and %cr7 either the user ASCE or the
        kernel ASCE, dependent on set_fs.
      
      To load a new value into %cr1 or %cr7 is an expensive operation, the kernel
      tries to be lazy about it. E.g. for multiple user copies in a row with
      MVCP/MVCS the replacement of the vdso ASCE in %cr7 with the user ASCE is
      done only once. On return to user space a CPU bit is checked that loads the
      vdso ASCE again.
      
      To enable and disable the data access via the secondary space two new
      functions are added, enable_sacf_uaccess and disable_sacf_uaccess. The fact
      that a context is in secondary space uaccess mode is stored in the
      mm_segment_t value for the task. The code of an interrupt may use set_fs
      as long as it returns to the previous state it got with get_fs with another
      call to set_fs. The code in finish_arch_post_lock_switch simply has to do a
      set_fs with the current mm_segment_t value for the task.
      
      For CPUs with MVCOS:
      
      CPU running in                        | %cr1 ASCE | %cr7 ASCE |
      --------------------------------------|-----------|-----------|
      user space                            |  user     |  vdso     |
      kernel, USER_DS, normal-mode          |  user     |  vdso     |
      kernel, USER_DS, normal-mode, lazy    |  user     |  user     |
      kernel, USER_DS, sacf-mode            |  kernel   |  user     |
      kernel, KERNEL_DS, normal-mode        |  kernel   |  vdso     |
      kernel, KERNEL_DS, normal-mode, lazy  |  kernel   |  kernel   |
      kernel, KERNEL_DS, sacf-mode          |  kernel   |  kernel   |
      
      For CPUs without MVCOS:
      
      CPU running in                        | %cr1 ASCE | %cr7 ASCE |
      --------------------------------------|-----------|-----------|
      user space                            |  user     |  vdso     |
      kernel, USER_DS, normal-mode          |  user     |  vdso     |
      kernel, USER_DS, normal-mode lazy     |  kernel   |  user     |
      kernel, USER_DS, sacf-mode            |  kernel   |  user     |
      kernel, KERNEL_DS, normal-mode        |  kernel   |  vdso     |
      kernel, KERNEL_DS, normal-mode, lazy  |  kernel   |  kernel   |
      kernel, KERNEL_DS, sacf-mode          |  kernel   |  kernel   |
      
      The lines with "lazy" refer to the state after a copy via the secondary
      space with a delayed reload of %cr1 and %cr7.
      
      There are three hardware address spaces that can cause a DAT exception,
      primary, secondary and home space. The exception can be related to
      four different fault types: user space fault, vdso fault, kernel fault,
      and the gmap faults.
      
      Dependent on the set_fs state and normal vs. sacf mode there are a number
      of fault combinations:
      
      1) user address space fault via the primary ASCE
      2) gmap address space fault via the primary ASCE
      3) kernel address space fault via the primary ASCE for machines with
         MVCOS and set_fs(KERNEL_DS)
      4) vdso address space faults via the secondary ASCE with an invalid
         address while running in secondary space in problem state
      5) user address space fault via the secondary ASCE for user-copy
         based on the secondary space mode, e.g. futex_ops or strnlen_user
      6) kernel address space fault via the secondary ASCE for user-copy
         with secondary space mode with set_fs(KERNEL_DS)
      7) kernel address space fault via the primary ASCE for user-copy
         with secondary space mode with set_fs(USER_DS) on machines without
         MVCOS.
      8) kernel address space fault via the home space ASCE
      
      Replace user_space_fault() with a new function get_fault_type() that
      can distinguish all four different fault types.
      
      With these changes the futex atomic ops from the kernel and the
      strnlen_user will get a little bit slower, as well as the old style
      uaccess with MVCP/MVCS. All user accesses based on MVCOS will be as
      fast as before. On the positive side, the user space vdso code is a
      lot faster and Linux ceases to use the complicated AR mode.
      
      Reviewed-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
      Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
      Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
      0aaba41b
    • Martin Schwidefsky's avatar
      s390/mm,kvm: improve detection of KVM guest faults · c771320e
      Martin Schwidefsky authored
      
      The identification of guest fault currently relies on the PF_VCPU flag.
      This is set in guest_entry_irqoff and cleared in guest_exit_irqoff.
      Both functions are called by __vcpu_run, the PF_VCPU flag is set for
      quite a lot of kernel code outside of the guest execution.
      
      Replace the PF_VCPU scheme with the PIF_GUEST_FAULT in the pt_regs and
      make the program check handler code in entry.S set the bit only for
      exception that occurred between the .Lsie_gmap and .Lsie_done labels.
      
      Reviewed-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
      Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
      c771320e
  19. Nov 08, 2017
  20. Nov 02, 2017
    • Greg Kroah-Hartman's avatar
      License cleanup: add SPDX GPL-2.0 license identifier to files with no license · b2441318
      Greg Kroah-Hartman authored
      
      Many source files in the tree are missing licensing information, which
      makes it harder for compliance tools to determine the correct license.
      
      By default all files without license information are under the default
      license of the kernel, which is GPL version 2.
      
      Update the files which contain no license information with the 'GPL-2.0'
      SPDX license identifier.  The SPDX identifier is a legally binding
      shorthand, which can be used instead of the full boiler plate text.
      
      This patch is based on work done by Thomas Gleixner and Kate Stewart and
      Philippe Ombredanne.
      
      How this work was done:
      
      Patches were generated and checked against linux-4.14-rc6 for a subset of
      the use cases:
       - file had no licensing information it it.
       - file was a */uapi/* one with no licensing information in it,
       - file was a */uapi/* one with existing licensing information,
      
      Further patches will be generated in subsequent months to fix up cases
      where non-standard license headers were used, and references to license
      had to be inferred by heuristics based on keywords.
      
      The analysis to determine which SPDX License Identifier to be applied to
      a file was done in a spreadsheet of side by side results from of the
      output of two independent scanners (ScanCode & Windriver) producing SPDX
      tag:value files created by Philippe Ombredanne.  Philippe prepared the
      base worksheet, and did an initial spot review of a few 1000 files.
      
      The 4.13 kernel was the starting point of the analysis with 60,537 files
      assessed.  Kate Stewart did a file by file comparison of the scanner
      results in the spreadsheet to determine which SPDX license identifier(s)
      to be applied to the file. She confirmed any determination that was not
      immediately clear with lawyers working with the Linux Foundation.
      
      Criteria used to select files for SPDX license identifier tagging was:
       - Files considered eligible had to be source code files.
       - Make and config files were included as candidates if they contained >5
         lines of source
       - File already had some variant of a license header in it (even if <5
         lines).
      
      All documentation files were explicitly excluded.
      
      The following heuristics were used to determine which SPDX license
      identifiers to apply.
      
       - when both scanners couldn't find any license traces, file was
         considered to have no license information in it, and the top level
         COPYING file license applied.
      
         For non */uapi/* files that summary was:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|-------
         GPL-2.0                                              11139
      
         and resulted in the first patch in this series.
      
         If that file was a */uapi/* path one, it was "GPL-2.0 WITH
         Linux-syscall-note" otherwise it was "GPL-2.0".  Results of that was:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|-------
         GPL-2.0 WITH Linux-syscall-note                        930
      
         and resulted in the second patch in this series.
      
       - if a file had some form of licensing information in it, and was one
         of the */uapi/* ones, it was denoted with the Linux-syscall-note if
         any GPL family license was found in the file or had no licensing in
         it (per prior point).  Results summary:
      
         SPDX license identifier                            # files
         ---------------------------------------------------|------
         GPL-2.0 WITH Linux-syscall-note                       270
         GPL-2.0+ WITH Linux-syscall-note                      169
         ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause)    21
         ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)    17
         LGPL-2.1+ WITH Linux-syscall-note                      15
         GPL-1.0+ WITH Linux-syscall-note                       14
         ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause)    5
         LGPL-2.0+ WITH Linux-syscall-note                       4
         LGPL-2.1 WITH Linux-syscall-note                        3
         ((GPL-2.0 WITH Linux-syscall-note) OR MIT)              3
         ((GPL-2.0 WITH Linux-syscall-note) AND MIT)             1
      
         and that resulted in the third patch in this series.
      
       - when the two scanners agreed on the detected license(s), that became
         the concluded license(s).
      
       - when there was disagreement between the two scanners (one detected a
         license but the other didn't, or they both detected different
         licenses) a manual inspection of the file occurred.
      
       - In most cases a manual inspection of the information in the file
         resulted in a clear resolution of the license that should apply (and
         which scanner probably needed to revisit its heuristics).
      
       - When it was not immediately clear, the license identifier was
         confirmed with lawyers working with the Linux Foundation.
      
       - If there was any question as to the appropriate license identifier,
         the file was flagged for further research and to be revisited later
         in time.
      
      In total, over 70 hours of logged manual review was done on the
      spreadsheet to determine the SPDX license identifiers to apply to the
      source files by Kate, Philippe, Thomas and, in some cases, confirmation
      by lawyers working with the Linux Foundation.
      
      Kate also obtained a third independent scan of the 4.13 code base from
      FOSSology, and compared selected files where the other two scanners
      disagreed against that SPDX file, to see if there was new insights.  The
      Windriver scanner is based on an older version of FOSSology in part, so
      they are related.
      
      Thomas did random spot checks in about 500 files from the spreadsheets
      for the uapi headers and agreed with SPDX license identifier in the
      files he inspected. For the non-uapi files Thomas did random spot checks
      in about 15000 files.
      
      In initial set of patches against 4.14-rc6, 3 files were found to have
      copy/paste license identifier errors, and have been fixed to reflect the
      correct identifier.
      
      Additionally Philippe spent 10 hours this week doing a detailed manual
      inspection and review of the 12,461 patched files from the initial patch
      version early this week with:
       - a full scancode scan run, collecting the matched texts, detected
         license ids and scores
       - reviewing anything where there was a license detected (about 500+
         files) to ensure that the applied SPDX license was correct
       - reviewing anything where there was no detection but the patch license
         was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied
         SPDX license was correct
      
      This produced a worksheet with 20 files needing minor correction.  This
      worksheet was then exported into 3 different .csv files for the
      different types of files to be modified.
      
      These .csv files were then reviewed by Greg.  Thomas wrote a script to
      parse the csv files and add the proper SPDX tag to the file, in the
      format that the file expected.  This script was further refined by Greg
      based on the output to detect more types of files automatically and to
      distinguish between header and source .c files (which need different
      comment types.)  Finally Greg ran the script using the .csv files to
      generate the patches.
      
      Reviewed-by: default avatarKate Stewart <kstewart@linuxfoundation.org>
      Reviewed-by: default avatarPhilippe Ombredanne <pombredanne@nexb.com>
      Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
      Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      b2441318
  21. Oct 09, 2017
  22. Oct 05, 2017
    • Kees Cook's avatar
      timer: Remove expires and data arguments from DEFINE_TIMER · 1d27e3e2
      Kees Cook authored
      
      Drop the arguments from the macro and adjust all callers with the
      following script:
      
        perl -pi -e 's/DEFINE_TIMER\((.*), 0, 0\);/DEFINE_TIMER($1);/g;' \
          $(git grep DEFINE_TIMER | cut -d: -f1 | sort -u | grep -v timer.h)
      
      Signed-off-by: default avatarKees Cook <keescook@chromium.org>
      Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> # for m68k parts
      Acked-by: Guenter Roeck <linux@roeck-us.net> # for watchdog parts
      Acked-by: David S. Miller <davem@davemloft.net> # for networking parts
      Acked-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
      Acked-by: Kalle Valo <kvalo@codeaurora.org> # for wireless parts
      Acked-by: default avatarArnd Bergmann <arnd@arndb.de>
      Cc: linux-mips@linux-mips.org
      Cc: Petr Mladek <pmladek@suse.com>
      Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      Cc: Lai Jiangshan <jiangshanlai@gmail.com>
      Cc: Sebastian Reichel <sre@kernel.org>
      Cc: Kalle Valo <kvalo@qca.qualcomm.com>
      Cc: Paul Mackerras <paulus@samba.org>
      Cc: Pavel Machek <pavel@ucw.cz>
      Cc: linux1394-devel@lists.sourceforge.net
      Cc: Chris Metcalf <cmetcalf@mellanox.com>
      Cc: linux-s390@vger.kernel.org
      Cc: linux-wireless@vger.kernel.org
      Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
      Cc: Wim Van Sebroeck <wim@iguana.be>
      Cc: Michael Ellerman <mpe@ellerman.id.au>
      Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
      Cc: Viresh Kumar <viresh.kumar@linaro.org>
      Cc: Harish Patil <harish.patil@cavium.com>
      Cc: Stephen Boyd <sboyd@codeaurora.org>
      Cc: Michael Reed <mdr@sgi.com>
      Cc: Manish Chopra <manish.chopra@cavium.com>
      Cc: Len Brown <len.brown@intel.com>
      Cc: Arnd Bergmann <arnd@arndb.de>
      Cc: linux-pm@vger.kernel.org
      Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
      Cc: Tejun Heo <tj@kernel.org>
      Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com>
      Cc: John Stultz <john.stultz@linaro.org>
      Cc: Mark Gross <mark.gross@intel.com>
      Cc: linux-watchdog@vger.kernel.org
      Cc: linux-scsi@vger.kernel.org
      Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
      Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
      Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
      Cc: Oleg Nesterov <oleg@redhat.com>
      Cc: Ralf Baechle <ralf@linux-mips.org>
      Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
      Cc: Guenter Roeck <linux@roeck-us.net>
      Cc: netdev@vger.kernel.org
      Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
      Cc: Andrew Morton <akpm@linux-foundation.org>
      Cc: linuxppc-dev@lists.ozlabs.org
      Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
      Link: https://lkml.kernel.org/r/1507159627-127660-11-git-send-email-keescook@chromium.org
      
      
      Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
      1d27e3e2
  23. Sep 19, 2017
  24. Sep 06, 2017
  25. Aug 31, 2017
  26. Aug 29, 2017
    • Christian Borntraeger's avatar
      s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs · fa41ba0d
      Christian Borntraeger authored
      
      Right now there is a potential hang situation for postcopy migrations,
      if the guest is enabling storage keys on the target system during the
      postcopy process.
      
      For storage key virtualization, we have to forbid the empty zero page as
      the storage key is a property of the physical page frame.  As we enable
      storage key handling lazily we then drop all mappings for empty zero
      pages for lazy refaulting later on.
      
      This does not work with the postcopy migration, which relies on the
      empty zero page never triggering a fault again in the future. The reason
      is that postcopy migration will simply read a page on the target system
      if that page is a known zero page to fault in an empty zero page.  At
      the same time postcopy remembers that this page was already transferred
      - so any future userfault on that page will NOT be retransmitted again
      to avoid races.
      
      If now the guest enters the storage key mode while in postcopy, we will
      break this assumption of postcopy.
      
      The solution is to disable the empty zero page for KVM guests early on
      and not during storage key enablement. With this change, the postcopy
      migration process is guaranteed to start after no zero pages are left.
      
      As guest pages are very likely not empty zero pages anyway the memory
      overhead is also pretty small.
      
      While at it this also adds proper page table locking to the zero page
      removal.
      
      Signed-off-by: default avatarChristian Borntraeger <borntraeger@de.ibm.com>
      Acked-by: default avatarJanosch Frank <frankja@linux.vnet.ibm.com>
      Cc: stable@vger.kernel.org
      Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
      fa41ba0d
Loading