MIPS: sync I/O related header files with linux-4.4

Mainly sync asm/io.h to get a working ioremap() implementation
as well as the full set of I/O accessors. Pull in additional
header files to make this work.

Furthermore port over the directory 'arch/mips/include/asm/mach-generic/'
with contains default definitions for I/O and memory spaces and default
implementations for mapping those spaces. All files in that directory
can be overwritten by a SoC/machine.

Signed-off-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index a7ab087..eb06afa 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -1,21 +1,28 @@
 /*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
  * Copyright (C) 1994, 1995 Waldorf GmbH
- * Copyright (C) 1994 - 2000 Ralf Baechle
+ * Copyright (C) 1994 - 2000, 06 Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2000 FSMLabs, Inc.
+ * Copyright (C) 2004, 2005  MIPS Technologies, Inc.  All rights reserved.
+ *	Author: Maciej W. Rozycki <macro@mips.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0
  */
 #ifndef _ASM_IO_H
 #define _ASM_IO_H
 
-#if 0
-#include <linux/pagemap.h>
-#endif
+#include <linux/compiler.h>
+#include <linux/types.h>
+
 #include <asm/addrspace.h>
 #include <asm/byteorder.h>
+#include <asm/cpu-features.h>
+#include <asm/pgtable-bits.h>
+#include <asm/processor.h>
+#include <asm/string.h>
+
+#include <ioremap.h>
+#include <mangle-port.h>
+#include <spaces.h>
 
 /*
  * Slowdown I/O port space accesses for antique hardware.
@@ -23,44 +30,20 @@
 #undef CONF_SLOWDOWN_IO
 
 /*
- * Sane hardware offers swapping of I/O space accesses in hardware; less
- * sane hardware forces software to fiddle with this ...
+ * Raw operations are never swapped in software.  OTOH values that raw
+ * operations are working on may or may not have been swapped by the bus
+ * hardware.  An example use would be for flash memory that's used for
+ * execute in place.
  */
-#if defined(CONFIG_SWAP_IO_SPACE) && defined(__MIPSEB__)
+# define __raw_ioswabb(a, x)	(x)
+# define __raw_ioswabw(a, x)	(x)
+# define __raw_ioswabl(a, x)	(x)
+# define __raw_ioswabq(a, x)	(x)
+# define ____raw_ioswabq(a, x)	(x)
 
-#define __ioswab8(x) (x)
-#define __ioswab16(x) swab16(x)
-#define __ioswab32(x) swab32(x)
+/* ioswab[bwlq], __mem_ioswab[bwlq] are defined in mangle-port.h */
 
-#else
-
-#define __ioswab8(x) (x)
-#define __ioswab16(x) (x)
-#define __ioswab32(x) (x)
-
-#endif
-
-/*
- * This file contains the definitions for the MIPS counterpart of the
- * x86 in/out instructions. This heap of macros and C results in much
- * better code than the approach of doing it in plain C.  The macros
- * result in code that is to fast for certain hardware.  On the other
- * side the performance of the string functions should be improved for
- * sake of certain devices like EIDE disks that do highspeed polled I/O.
- *
- *   Ralf
- *
- * This file contains the definitions for the x86 IO instructions
- * inb/inw/inl/outb/outw/outl and the "string versions" of the same
- * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
- * versions of the single-IO instructions (inb_p/inw_p/..).
- *
- * This file is not meant to be obfuscating: it's just complicated
- * to (a) handle it all in a way that makes gcc able to optimize it
- * as well as possible and (b) trying to avoid writing the same thing
- * over and over again with slight variations and possibly making a
- * mistake somewhere.
- */
+#define IO_SPACE_LIMIT 0xffff
 
 /*
  * On MIPS I/O ports are memory mapped, so we access them using normal
@@ -84,6 +67,7 @@
 static inline void set_io_port_base(unsigned long base)
 {
 	* (unsigned long *) &mips_io_port_base = base;
+	barrier();
 }
 
 /*
@@ -114,378 +98,429 @@
 #endif
 
 /*
- * Change virtual addresses to physical addresses and vv.
- * These are trivial on the 1:1 Linux/MIPS mapping
- */
-static inline phys_addr_t virt_to_phys(volatile void * address)
-{
-#ifndef CONFIG_64BIT
-	return CPHYSADDR(address);
-#else
-	return XPHYSADDR(address);
-#endif
-}
-
-static inline void * phys_to_virt(unsigned long address)
-{
-#ifndef CONFIG_64BIT
-	return (void *)KSEG0ADDR(address);
-#else
-	return (void *)CKSEG0ADDR(address);
-#endif
-}
-
-/*
- * IO bus memory addresses are also 1:1 with the physical address
- */
-static inline unsigned long virt_to_bus(volatile void * address)
-{
-#ifndef CONFIG_64BIT
-	return CPHYSADDR(address);
-#else
-	return XPHYSADDR(address);
-#endif
-}
-
-static inline void * bus_to_virt(unsigned long address)
-{
-#ifndef CONFIG_64BIT
-	return (void *)KSEG0ADDR(address);
-#else
-	return (void *)CKSEG0ADDR(address);
-#endif
-}
-
-/*
- * isa_slot_offset is the address where E(ISA) busaddress 0 is mapped
- * for the processor.
- */
-extern unsigned long isa_slot_offset;
-
-extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-
-#if 0
-static inline void *ioremap(unsigned long offset, unsigned long size)
-{
-	return __ioremap(offset, size, _CACHE_UNCACHED);
-}
-
-static inline void *ioremap_nocache(unsigned long offset, unsigned long size)
-{
-	return __ioremap(offset, size, _CACHE_UNCACHED);
-}
-
-extern void iounmap(void *addr);
-#endif
-
-/*
- * XXX We need system specific versions of these to handle EISA address bits
- * 24-31 on SNI.
- * XXX more SNI hacks.
- */
-#define __raw_readb(addr) (*(volatile unsigned char *)(addr))
-#define __raw_readw(addr) (*(volatile unsigned short *)(addr))
-#define __raw_readl(addr) (*(volatile unsigned int *)(addr))
-#define readb(addr) __raw_readb((addr))
-#define readw(addr) __ioswab16(__raw_readw((addr)))
-#define readl(addr) __ioswab32(__raw_readl((addr)))
-
-#define __raw_writeb(b, addr) (*(volatile unsigned char *)(addr)) = (b)
-#define __raw_writew(b, addr) (*(volatile unsigned short *)(addr)) = (b)
-#define __raw_writel(b, addr) (*(volatile unsigned int *)(addr)) = (b)
-#define writeb(b, addr) __raw_writeb((b), (addr))
-#define writew(b, addr) __raw_writew(__ioswab16(b), (addr))
-#define writel(b, addr) __raw_writel(__ioswab32(b), (addr))
-
-#define memset_io(a,b,c)	memset((void *)(a),(b),(c))
-#define memcpy_fromio(a,b,c)	memcpy((a),(void *)(b),(c))
-#define memcpy_toio(a,b,c)	memcpy((void *)(a),(b),(c))
-
-/* END SNI HACKS ... */
-
-/*
- * ISA space is 'always mapped' on currently supported MIPS systems, no need
- * to explicitly ioremap() it. The fact that the ISA IO space is mapped
- * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
- * are physical addresses. The following constant pointer can be
- * used as the IO-area pointer (it can be iounmapped as well, so the
- * analogy with PCI is quite large):
- */
-#define __ISA_IO_base ((char *)(PAGE_OFFSET))
-
-#define isa_readb(a) readb(a)
-#define isa_readw(a) readw(a)
-#define isa_readl(a) readl(a)
-#define isa_writeb(b,a) writeb(b,a)
-#define isa_writew(w,a) writew(w,a)
-#define isa_writel(l,a) writel(l,a)
-
-#define isa_memset_io(a,b,c)     memset_io((a),(b),(c))
-#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),(b),(c))
-#define isa_memcpy_toio(a,b,c)   memcpy_toio((a),(b),(c))
-
-/*
- * We don't have csum_partial_copy_fromio() yet, so we cheat here and
- * just copy it. The net code will then do the checksum later.
- */
-#define eth_io_copy_and_sum(skb,src,len,unused) memcpy_fromio((skb)->data,(src),(len))
-#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(b),(c),(d))
-
-static inline int check_signature(unsigned long io_addr,
-				  const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-#define isa_check_signature(io, s, l) check_signature(i,s,l)
-
-/*
- * Talk about misusing macros..
- */
-
-#define __OUT1(s) \
-static inline void __out##s(unsigned int value, unsigned int port) {
-
-#define __OUT2(m) \
-__asm__ __volatile__ ("s" #m "\t%0,%1(%2)"
-
-#define __OUT(m,s,w) \
-__OUT1(s) __OUT2(m) : : "r" (__ioswab##w(value)), "i" (0), "r" (mips_io_port_base+port)); } \
-__OUT1(s##c) __OUT2(m) : : "r" (__ioswab##w(value)), "ir" (port), "r" (mips_io_port_base)); } \
-__OUT1(s##_p) __OUT2(m) : : "r" (__ioswab##w(value)), "i" (0), "r" (mips_io_port_base+port)); \
-	SLOW_DOWN_IO; } \
-__OUT1(s##c_p) __OUT2(m) : : "r" (__ioswab##w(value)), "ir" (port), "r" (mips_io_port_base)); \
-	SLOW_DOWN_IO; }
-
-#define __IN1(t,s) \
-static inline t __in##s(unsigned int port) { t _v;
-
-/*
- * Required nops will be inserted by the assembler
- */
-#define __IN2(m) \
-__asm__ __volatile__ ("l" #m "\t%0,%1(%2)"
-
-#define __IN(t,m,s,w) \
-__IN1(t,s) __IN2(m) : "=r" (_v) : "i" (0), "r" (mips_io_port_base+port)); return __ioswab##w(_v); } \
-__IN1(t,s##c) __IN2(m) : "=r" (_v) : "ir" (port), "r" (mips_io_port_base)); return __ioswab##w(_v); } \
-__IN1(t,s##_p) __IN2(m) : "=r" (_v) : "i" (0), "r" (mips_io_port_base+port)); SLOW_DOWN_IO; return __ioswab##w(_v); } \
-__IN1(t,s##c_p) __IN2(m) : "=r" (_v) : "ir" (port), "r" (mips_io_port_base)); SLOW_DOWN_IO; return __ioswab##w(_v); }
-
-#define __INS1(s) \
-static inline void __ins##s(unsigned int port, void * addr, unsigned long count) {
-
-#define __INS2(m) \
-if (count) \
-__asm__ __volatile__ ( \
-	".set\tnoreorder\n\t" \
-	".set\tnoat\n" \
-	"1:\tl" #m "\t$1,%4(%5)\n\t" \
-	"subu\t%1,1\n\t" \
-	"s" #m "\t$1,(%0)\n\t" \
-	"bne\t$0,%1,1b\n\t" \
-	"addiu\t%0,%6\n\t" \
-	".set\tat\n\t" \
-	".set\treorder"
-
-#define __INS(m,s,i) \
-__INS1(s) __INS2(m) \
-	: "=r" (addr), "=r" (count) \
-	: "0" (addr), "1" (count), "i" (0), \
-	  "r" (mips_io_port_base+port), "I" (i) \
-	: "$1");} \
-__INS1(s##c) __INS2(m) \
-	: "=r" (addr), "=r" (count) \
-	: "0" (addr), "1" (count), "ir" (port), \
-	  "r" (mips_io_port_base), "I" (i) \
-	: "$1");}
-
-#define __OUTS1(s) \
-static inline void __outs##s(unsigned int port, const void * addr, unsigned long count) {
-
-#define __OUTS2(m) \
-if (count) \
-__asm__ __volatile__ ( \
-	".set\tnoreorder\n\t" \
-	".set\tnoat\n" \
-	"1:\tl" #m "\t$1,(%0)\n\t" \
-	"subu\t%1,1\n\t" \
-	"s" #m "\t$1,%4(%5)\n\t" \
-	"bne\t$0,%1,1b\n\t" \
-	"addiu\t%0,%6\n\t" \
-	".set\tat\n\t" \
-	".set\treorder"
-
-#define __OUTS(m,s,i) \
-__OUTS1(s) __OUTS2(m) \
-	: "=r" (addr), "=r" (count) \
-	: "0" (addr), "1" (count), "i" (0), "r" (mips_io_port_base+port), "I" (i) \
-	: "$1");} \
-__OUTS1(s##c) __OUTS2(m) \
-	: "=r" (addr), "=r" (count) \
-	: "0" (addr), "1" (count), "ir" (port), "r" (mips_io_port_base), "I" (i) \
-	: "$1");}
-
-__IN(unsigned char,b,b,8)
-__IN(unsigned short,h,w,16)
-__IN(unsigned int,w,l,32)
-
-__OUT(b,b,8)
-__OUT(h,w,16)
-__OUT(w,l,32)
-
-__INS(b,b,1)
-__INS(h,w,2)
-__INS(w,l,4)
-
-__OUTS(b,b,1)
-__OUTS(h,w,2)
-__OUTS(w,l,4)
-
-
-/*
- * Note that due to the way __builtin_constant_p() works, you
- *  - can't use it inside an inline function (it will never be true)
- *  - you don't have to worry about side effects within the __builtin..
- */
-#define outb(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outbc((val),(port)) : \
-	__outb((val),(port)))
-
-#define inb(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inbc(port) : \
-	__inb(port))
-
-#define outb_p(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outbc_p((val),(port)) : \
-	__outb_p((val),(port)))
-
-#define inb_p(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inbc_p(port) : \
-	__inb_p(port))
-
-#define outw(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outwc((val),(port)) : \
-	__outw((val),(port)))
-
-#define inw(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inwc(port) : \
-	__inw(port))
-
-#define outw_p(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outwc_p((val),(port)) : \
-	__outw_p((val),(port)))
-
-#define inw_p(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inwc_p(port) : \
-	__inw_p(port))
-
-#define outl(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outlc((val),(port)) : \
-	__outl((val),(port)))
-
-#define inl(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inlc(port) : \
-	__inl(port))
-
-#define outl_p(val,port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outlc_p((val),(port)) : \
-	__outl_p((val),(port)))
-
-#define inl_p(port) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inlc_p(port) : \
-	__inl_p(port))
-
-
-#define outsb(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outsbc((port),(addr),(count)) : \
-	__outsb ((port),(addr),(count)))
-
-#define insb(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__insbc((port),(addr),(count)) : \
-	__insb((port),(addr),(count)))
-
-#define outsw(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outswc((port),(addr),(count)) : \
-	__outsw ((port),(addr),(count)))
-
-#define insw(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inswc((port),(addr),(count)) : \
-	__insw((port),(addr),(count)))
-
-#define outsl(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__outslc((port),(addr),(count)) : \
-	__outsl ((port),(addr),(count)))
-
-#define insl(port,addr,count) \
-((__builtin_constant_p((port)) && (port) < 32768) ? \
-	__inslc((port),(addr),(count)) : \
-	__insl((port),(addr),(count)))
-
-#define IO_SPACE_LIMIT 0xffff
-
-/*
- * The caches on some architectures aren't dma-coherent and have need to
- * handle this in software.  There are three types of operations that
- * can be applied to dma buffers.
+ *     virt_to_phys    -       map virtual addresses to physical
+ *     @address: address to remap
  *
- *  - dma_cache_wback_inv(start, size) makes caches and coherent by
- *    writing the content of the caches back to memory, if necessary.
- *    The function also invalidates the affected part of the caches as
- *    necessary before DMA transfers from outside to memory.
- *  - dma_cache_wback(start, size) makes caches and coherent by
- *    writing the content of the caches back to memory, if necessary.
- *    The function also invalidates the affected part of the caches as
- *    necessary before DMA transfers from outside to memory.
- *  - dma_cache_inv(start, size) invalidates the affected parts of the
- *    caches.  Dirty lines of the caches may be written back or simply
- *    be discarded.  This operation is necessary before dma operations
- *    to the memory.
+ *     The returned physical address is the physical (CPU) mapping for
+ *     the memory address given. It is only valid to use this function on
+ *     addresses directly mapped or allocated via kmalloc.
+ *
+ *     This function does not give bus mappings for DMA transfers. In
+ *     almost all conceivable cases a device driver should not be using
+ *     this function
  */
-extern void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
-extern void (*_dma_cache_wback)(unsigned long start, unsigned long size);
-extern void (*_dma_cache_inv)(unsigned long start, unsigned long size);
-
-#define dma_cache_wback_inv(start,size)	_dma_cache_wback_inv(start,size)
-#define dma_cache_wback(start,size)	_dma_cache_wback(start,size)
-#define dma_cache_inv(start,size)	_dma_cache_inv(start,size)
-
-static inline void sync(void)
+static inline unsigned long virt_to_phys(volatile const void *address)
 {
+	unsigned long addr = (unsigned long)address;
+
+	/* this corresponds to kernel implementation of __pa() */
+#ifdef CONFIG_64BIT
+	if (addr < CKSEG0)
+		return XPHYSADDR(addr);
+
+	return CPHYSADDR(addr);
+#else
+	return addr - PAGE_OFFSET + PHYS_OFFSET;
+#endif
 }
 
 /*
- * Given a physical address and a length, return a virtual address
- * that can be used to access the memory range with the caching
- * properties specified by "flags".
+ *     phys_to_virt    -       map physical address to virtual
+ *     @address: address to remap
+ *
+ *     The returned virtual address is a current CPU mapping for
+ *     the memory address given. It is only valid to use this function on
+ *     addresses that have a kernel mapping
+ *
+ *     This function does not handle bus mappings for DMA transfers. In
+ *     almost all conceivable cases a device driver should not be using
+ *     this function
  */
-#define MAP_NOCACHE	(0)
+static inline void *phys_to_virt(unsigned long address)
+{
+	return (void *)(address + PAGE_OFFSET - PHYS_OFFSET);
+}
+
+/*
+ * ISA I/O bus memory addresses are 1:1 with the physical address.
+ */
+static inline unsigned long isa_virt_to_bus(volatile void *address)
+{
+	return (unsigned long)address - PAGE_OFFSET;
+}
+
+static inline void *isa_bus_to_virt(unsigned long address)
+{
+	return (void *)(address + PAGE_OFFSET);
+}
+
+#define isa_page_to_bus page_to_phys
+
+/*
+ * However PCI ones are not necessarily 1:1 and therefore these interfaces
+ * are forbidden in portable PCI drivers.
+ *
+ * Allow them for x86 for legacy drivers, though.
+ */
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+
+static inline void __iomem *__ioremap_mode(phys_addr_t offset, unsigned long size,
+	unsigned long flags)
+{
+	void __iomem *addr;
+	phys_addr_t phys_addr;
+
+	addr = plat_ioremap(offset, size, flags);
+	if (addr)
+		return addr;
+
+	phys_addr = fixup_bigphys_addr(offset, size);
+	return (void __iomem *)(unsigned long)CKSEG1ADDR(phys_addr);
+}
+
+/*
+ * ioremap     -   map bus memory into CPU space
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ */
+#define ioremap(offset, size)						\
+	__ioremap_mode((offset), (size), _CACHE_UNCACHED)
+
+/*
+ * ioremap_nocache     -   map bus memory into CPU space
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked uncachable
+ * on the CPU as well as honouring existing caching rules from things like
+ * the PCI bus. Note that there are other caches and buffers on many
+ * busses. In particular driver authors should read up on PCI writes
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ */
+#define ioremap_nocache(offset, size)					\
+	__ioremap_mode((offset), (size), _CACHE_UNCACHED)
+#define ioremap_uc ioremap_nocache
+
+/*
+ * ioremap_cachable -	map bus memory into CPU space
+ * @offset:	    bus address of the memory
+ * @size:	    size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked cachable by
+ * the CPU.  Also enables full write-combining.	 Useful for some
+ * memory-like regions on I/O busses.
+ */
+#define ioremap_cachable(offset, size)					\
+	__ioremap_mode((offset), (size), _page_cachable_default)
+
+/*
+ * These two are MIPS specific ioremap variant.	 ioremap_cacheable_cow
+ * requests a cachable mapping, ioremap_uncached_accelerated requests a
+ * mapping using the uncached accelerated mode which isn't supported on
+ * all processors.
+ */
+#define ioremap_cacheable_cow(offset, size)				\
+	__ioremap_mode((offset), (size), _CACHE_CACHABLE_COW)
+#define ioremap_uncached_accelerated(offset, size)			\
+	__ioremap_mode((offset), (size), _CACHE_UNCACHED_ACCELERATED)
+
+static inline void iounmap(const volatile void __iomem *addr)
+{
+	plat_iounmap(addr);
+}
+
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#define war_octeon_io_reorder_wmb()		wmb()
+#else
+#define war_octeon_io_reorder_wmb()		do { } while (0)
+#endif
+
+#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq)			\
+									\
+static inline void pfx##write##bwlq(type val,				\
+				    volatile void __iomem *mem)		\
+{									\
+	volatile type *__mem;						\
+	type __val;							\
+									\
+	war_octeon_io_reorder_wmb();					\
+									\
+	__mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem));	\
+									\
+	__val = pfx##ioswab##bwlq(__mem, val);				\
+									\
+	if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \
+		*__mem = __val;						\
+	else if (cpu_has_64bits) {					\
+		type __tmp;						\
+									\
+		__asm__ __volatile__(					\
+			".set	arch=r4000"	"\t\t# __writeq""\n\t"	\
+			"dsll32 %L0, %L0, 0"			"\n\t"	\
+			"dsrl32 %L0, %L0, 0"			"\n\t"	\
+			"dsll32 %M0, %M0, 0"			"\n\t"	\
+			"or	%L0, %L0, %M0"			"\n\t"	\
+			"sd	%L0, %2"			"\n\t"	\
+			".set	mips0"				"\n"	\
+			: "=r" (__tmp)					\
+			: "0" (__val), "m" (*__mem));			\
+	} else								\
+		BUG();							\
+}									\
+									\
+static inline type pfx##read##bwlq(const volatile void __iomem *mem)	\
+{									\
+	volatile type *__mem;						\
+	type __val;							\
+									\
+	__mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem));	\
+									\
+	if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \
+		__val = *__mem;						\
+	else if (cpu_has_64bits) {					\
+		__asm__ __volatile__(					\
+			".set	arch=r4000"	"\t\t# __readq" "\n\t"	\
+			"ld	%L0, %1"			"\n\t"	\
+			"dsra32 %M0, %L0, 0"			"\n\t"	\
+			"sll	%L0, %L0, 0"			"\n\t"	\
+			".set	mips0"				"\n"	\
+			: "=r" (__val)					\
+			: "m" (*__mem));				\
+	} else {							\
+		__val = 0;						\
+		BUG();							\
+	}								\
+									\
+	return pfx##ioswab##bwlq(__mem, __val);				\
+}
+
+#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, p, slow)			\
+									\
+static inline void pfx##out##bwlq##p(type val, unsigned long port)	\
+{									\
+	volatile type *__addr;						\
+	type __val;							\
+									\
+	war_octeon_io_reorder_wmb();					\
+									\
+	__addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \
+									\
+	__val = pfx##ioswab##bwlq(__addr, val);				\
+									\
+	/* Really, we want this to be atomic */				\
+	BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long));		\
+									\
+	*__addr = __val;						\
+	slow;								\
+}									\
+									\
+static inline type pfx##in##bwlq##p(unsigned long port)			\
+{									\
+	volatile type *__addr;						\
+	type __val;							\
+									\
+	__addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \
+									\
+	BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long));		\
+									\
+	__val = *__addr;						\
+	slow;								\
+									\
+	return pfx##ioswab##bwlq(__addr, __val);			\
+}
+
+#define __BUILD_MEMORY_PFX(bus, bwlq, type)				\
+									\
+__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1)
+
+#define BUILDIO_MEM(bwlq, type)						\
+									\
+__BUILD_MEMORY_PFX(__raw_, bwlq, type)					\
+__BUILD_MEMORY_PFX(, bwlq, type)					\
+__BUILD_MEMORY_PFX(__mem_, bwlq, type)					\
+
+BUILDIO_MEM(b, u8)
+BUILDIO_MEM(w, u16)
+BUILDIO_MEM(l, u32)
+BUILDIO_MEM(q, u64)
+
+#define __BUILD_IOPORT_PFX(bus, bwlq, type)				\
+	__BUILD_IOPORT_SINGLE(bus, bwlq, type, ,)			\
+	__BUILD_IOPORT_SINGLE(bus, bwlq, type, _p, SLOW_DOWN_IO)
+
+#define BUILDIO_IOPORT(bwlq, type)					\
+	__BUILD_IOPORT_PFX(, bwlq, type)				\
+	__BUILD_IOPORT_PFX(__mem_, bwlq, type)
+
+BUILDIO_IOPORT(b, u8)
+BUILDIO_IOPORT(w, u16)
+BUILDIO_IOPORT(l, u32)
+#ifdef CONFIG_64BIT
+BUILDIO_IOPORT(q, u64)
+#endif
+
+#define __BUILDIO(bwlq, type)						\
+									\
+__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 0)
+
+__BUILDIO(q, u64)
+
+#define readb_relaxed			readb
+#define readw_relaxed			readw
+#define readl_relaxed			readl
+#define readq_relaxed			readq
+
+#define writeb_relaxed			writeb
+#define writew_relaxed			writew
+#define writel_relaxed			writel
+#define writeq_relaxed			writeq
+
+#define readb_be(addr)							\
+	__raw_readb((__force unsigned *)(addr))
+#define readw_be(addr)							\
+	be16_to_cpu(__raw_readw((__force unsigned *)(addr)))
+#define readl_be(addr)							\
+	be32_to_cpu(__raw_readl((__force unsigned *)(addr)))
+#define readq_be(addr)							\
+	be64_to_cpu(__raw_readq((__force unsigned *)(addr)))
+
+#define writeb_be(val, addr)						\
+	__raw_writeb((val), (__force unsigned *)(addr))
+#define writew_be(val, addr)						\
+	__raw_writew(cpu_to_be16((val)), (__force unsigned *)(addr))
+#define writel_be(val, addr)						\
+	__raw_writel(cpu_to_be32((val)), (__force unsigned *)(addr))
+#define writeq_be(val, addr)						\
+	__raw_writeq(cpu_to_be64((val)), (__force unsigned *)(addr))
+
+/*
+ * Some code tests for these symbols
+ */
+#define readq				readq
+#define writeq				writeq
+
+#define __BUILD_MEMORY_STRING(bwlq, type)				\
+									\
+static inline void writes##bwlq(volatile void __iomem *mem,		\
+				const void *addr, unsigned int count)	\
+{									\
+	const volatile type *__addr = addr;				\
+									\
+	while (count--) {						\
+		__mem_write##bwlq(*__addr, mem);			\
+		__addr++;						\
+	}								\
+}									\
+									\
+static inline void reads##bwlq(volatile void __iomem *mem, void *addr,	\
+			       unsigned int count)			\
+{									\
+	volatile type *__addr = addr;					\
+									\
+	while (count--) {						\
+		*__addr = __mem_read##bwlq(mem);			\
+		__addr++;						\
+	}								\
+}
+
+#define __BUILD_IOPORT_STRING(bwlq, type)				\
+									\
+static inline void outs##bwlq(unsigned long port, const void *addr,	\
+			      unsigned int count)			\
+{									\
+	const volatile type *__addr = addr;				\
+									\
+	while (count--) {						\
+		__mem_out##bwlq(*__addr, port);				\
+		__addr++;						\
+	}								\
+}									\
+									\
+static inline void ins##bwlq(unsigned long port, void *addr,		\
+			     unsigned int count)			\
+{									\
+	volatile type *__addr = addr;					\
+									\
+	while (count--) {						\
+		*__addr = __mem_in##bwlq(port);				\
+		__addr++;						\
+	}								\
+}
+
+#define BUILDSTRING(bwlq, type)						\
+									\
+__BUILD_MEMORY_STRING(bwlq, type)					\
+__BUILD_IOPORT_STRING(bwlq, type)
+
+BUILDSTRING(b, u8)
+BUILDSTRING(w, u16)
+BUILDSTRING(l, u32)
+#ifdef CONFIG_64BIT
+BUILDSTRING(q, u64)
+#endif
+
+
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+#define mmiowb() wmb()
+#else
+/* Depends on MIPS II instruction set */
+#define mmiowb() asm volatile ("sync" ::: "memory")
+#endif
+
+static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
+{
+	memset((void __force *)addr, val, count);
+}
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
+{
+	memcpy(dst, (void __force *)src, count);
+}
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
+{
+	memcpy((void __force *)dst, src, count);
+}
+
+/*
+ * Read a 32-bit register that requires a 64-bit read cycle on the bus.
+ * Avoid interrupt mucking, just adjust the address for 4-byte access.
+ * Assume the addresses are 8-byte aligned.
+ */
+#ifdef __MIPSEB__
+#define __CSR_32_ADJUST 4
+#else
+#define __CSR_32_ADJUST 0
+#endif
+
+#define csr_out32(v, a) (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST) = (v))
+#define csr_in32(a)    (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST))
+
+/*
+ * U-Boot specific
+ */
+#define sync()		mmiowb()
+
+#define MAP_NOCACHE	(1)
 #define MAP_WRCOMBINE	(0)
 #define MAP_WRBACK	(0)
 #define MAP_WRTHROUGH	(0)
@@ -493,6 +528,9 @@
 static inline void *
 map_physmem(phys_addr_t paddr, unsigned long len, unsigned long flags)
 {
+	if (flags == MAP_NOCACHE)
+		return ioremap(paddr, len);
+
 	return (void *)paddr;
 }
 
@@ -501,7 +539,6 @@
  */
 static inline void unmap_physmem(void *vaddr, unsigned long flags)
 {
-
 }
 
 #endif /* _ASM_IO_H */