From 54f2c841fa0007e5fee3b7d01a911c774f0a6cda Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 7 May 2009 17:28:59 +0200 Subject: [PATCH 01/91] oprofile: fix cpu buffer size The unit of oprofile_cpu_buffer_size is in samples, but was allocated in bytes. This led to the allocation of too small cpu buffers. This patch recalculates the buffer size in bytes taking also the ring_buffer_event header size into account. Reported-by: Suravee Suthikulpanit Signed-off-by: Robert Richter --- drivers/oprofile/cpu_buffer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index f0e99d4c066b..242257b19441 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -78,16 +78,20 @@ void free_cpu_buffers(void) op_ring_buffer_write = NULL; } +#define RB_EVENT_HDR_SIZE 4 + int alloc_cpu_buffers(void) { int i; unsigned long buffer_size = oprofile_cpu_buffer_size; + unsigned long byte_size = buffer_size * (sizeof(struct op_sample) + + RB_EVENT_HDR_SIZE); - op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); + op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); if (!op_ring_buffer_read) goto fail; - op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); + op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); if (!op_ring_buffer_write) goto fail; From 5df6d737dd4b0fe9eccf943abb3677cfea05a6c4 Mon Sep 17 00:00:00 2001 From: Abhijeet Joglekar Date: Fri, 17 Apr 2009 18:33:26 -0700 Subject: [PATCH 02/91] [SCSI] fnic: Add new Cisco PCI-Express FCoE HBA fnic is a driver for the Cisco PCI-Express FCoE HBA Signed-off-by: Abhijeet Joglekar Signed-off-by: Joe Eykholt Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- MAINTAINERS | 8 + drivers/scsi/Kconfig | 11 + drivers/scsi/Makefile | 1 + drivers/scsi/fnic/Makefile | 15 + drivers/scsi/fnic/cq_desc.h | 78 ++ drivers/scsi/fnic/cq_enet_desc.h | 167 +++ drivers/scsi/fnic/cq_exch_desc.h | 182 +++ drivers/scsi/fnic/fcpio.h | 780 ++++++++++++ drivers/scsi/fnic/fnic.h | 265 +++++ drivers/scsi/fnic/fnic_attrs.c | 56 + drivers/scsi/fnic/fnic_fcs.c | 742 ++++++++++++ drivers/scsi/fnic/fnic_io.h | 67 ++ drivers/scsi/fnic/fnic_isr.c | 332 ++++++ drivers/scsi/fnic/fnic_main.c | 942 +++++++++++++++ drivers/scsi/fnic/fnic_res.c | 444 +++++++ drivers/scsi/fnic/fnic_res.h | 197 +++ drivers/scsi/fnic/fnic_scsi.c | 1850 +++++++++++++++++++++++++++++ drivers/scsi/fnic/rq_enet_desc.h | 58 + drivers/scsi/fnic/vnic_cq.c | 85 ++ drivers/scsi/fnic/vnic_cq.h | 121 ++ drivers/scsi/fnic/vnic_cq_copy.h | 62 + drivers/scsi/fnic/vnic_dev.c | 690 +++++++++++ drivers/scsi/fnic/vnic_dev.h | 161 +++ drivers/scsi/fnic/vnic_devcmd.h | 281 +++++ drivers/scsi/fnic/vnic_intr.c | 60 + drivers/scsi/fnic/vnic_intr.h | 118 ++ drivers/scsi/fnic/vnic_nic.h | 69 ++ drivers/scsi/fnic/vnic_resource.h | 61 + drivers/scsi/fnic/vnic_rq.c | 196 +++ drivers/scsi/fnic/vnic_rq.h | 235 ++++ drivers/scsi/fnic/vnic_scsi.h | 99 ++ drivers/scsi/fnic/vnic_stats.h | 68 ++ drivers/scsi/fnic/vnic_wq.c | 182 +++ drivers/scsi/fnic/vnic_wq.h | 175 +++ drivers/scsi/fnic/vnic_wq_copy.c | 117 ++ drivers/scsi/fnic/vnic_wq_copy.h | 128 ++ drivers/scsi/fnic/wq_enet_desc.h | 96 ++ 37 files changed, 9199 insertions(+) create mode 100644 drivers/scsi/fnic/Makefile create mode 100644 drivers/scsi/fnic/cq_desc.h create mode 100644 drivers/scsi/fnic/cq_enet_desc.h create mode 100644 drivers/scsi/fnic/cq_exch_desc.h create mode 100644 drivers/scsi/fnic/fcpio.h create mode 100644 drivers/scsi/fnic/fnic.h create mode 100644 drivers/scsi/fnic/fnic_attrs.c create mode 100644 drivers/scsi/fnic/fnic_fcs.c create mode 100644 drivers/scsi/fnic/fnic_io.h create mode 100644 drivers/scsi/fnic/fnic_isr.c create mode 100644 drivers/scsi/fnic/fnic_main.c create mode 100644 drivers/scsi/fnic/fnic_res.c create mode 100644 drivers/scsi/fnic/fnic_res.h create mode 100644 drivers/scsi/fnic/fnic_scsi.c create mode 100644 drivers/scsi/fnic/rq_enet_desc.h create mode 100644 drivers/scsi/fnic/vnic_cq.c create mode 100644 drivers/scsi/fnic/vnic_cq.h create mode 100644 drivers/scsi/fnic/vnic_cq_copy.h create mode 100644 drivers/scsi/fnic/vnic_dev.c create mode 100644 drivers/scsi/fnic/vnic_dev.h create mode 100644 drivers/scsi/fnic/vnic_devcmd.h create mode 100644 drivers/scsi/fnic/vnic_intr.c create mode 100644 drivers/scsi/fnic/vnic_intr.h create mode 100644 drivers/scsi/fnic/vnic_nic.h create mode 100644 drivers/scsi/fnic/vnic_resource.h create mode 100644 drivers/scsi/fnic/vnic_rq.c create mode 100644 drivers/scsi/fnic/vnic_rq.h create mode 100644 drivers/scsi/fnic/vnic_scsi.h create mode 100644 drivers/scsi/fnic/vnic_stats.h create mode 100644 drivers/scsi/fnic/vnic_wq.c create mode 100644 drivers/scsi/fnic/vnic_wq.h create mode 100644 drivers/scsi/fnic/vnic_wq_copy.c create mode 100644 drivers/scsi/fnic/vnic_wq_copy.h create mode 100644 drivers/scsi/fnic/wq_enet_desc.h diff --git a/MAINTAINERS b/MAINTAINERS index 2b349ba4add4..c7bed166ad6f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1431,6 +1431,14 @@ P: Russell King M: linux@arm.linux.org.uk F: include/linux/clk.h +CISCO FCOE HBA DRIVER +P: Abhijeet Joglekar +M: abjoglek@cisco.com +P: Joe Eykholt +M: jeykholt@cisco.com +L: linux-scsi@vger.kernel.org +S: Supported + CODA FILE SYSTEM P: Jan Harkes M: jaharkes@cs.cmu.edu diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 8ed2990c826e..fb2740789b68 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -628,6 +628,17 @@ config FCOE ---help--- Fibre Channel over Ethernet module +config FCOE_FNIC + tristate "Cisco FNIC Driver" + depends on PCI && X86 + select LIBFC + help + This is support for the Cisco PCI-Express FCoE HBA. + + To compile this driver as a module, choose M here and read + . + The module will be called fnic. + config SCSI_DMX3191D tristate "DMX3191D SCSI support" depends on PCI && SCSI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index e7c861ac417d..a5049cfb40ed 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_SCSI_DH) += device_handler/ obj-$(CONFIG_LIBFC) += libfc/ obj-$(CONFIG_LIBFCOE) += fcoe/ obj-$(CONFIG_FCOE) += fcoe/ +obj-$(CONFIG_FCOE_FNIC) += fnic/ obj-$(CONFIG_ISCSI_TCP) += libiscsi.o libiscsi_tcp.o iscsi_tcp.o obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o obj-$(CONFIG_SCSI_A4000T) += 53c700.o a4000t.o diff --git a/drivers/scsi/fnic/Makefile b/drivers/scsi/fnic/Makefile new file mode 100644 index 000000000000..37c3440bc17c --- /dev/null +++ b/drivers/scsi/fnic/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_FCOE_FNIC) += fnic.o + +fnic-y := \ + fnic_attrs.o \ + fnic_isr.o \ + fnic_main.o \ + fnic_res.o \ + fnic_fcs.o \ + fnic_scsi.o \ + vnic_cq.o \ + vnic_dev.o \ + vnic_intr.o \ + vnic_rq.o \ + vnic_wq_copy.o \ + vnic_wq.o diff --git a/drivers/scsi/fnic/cq_desc.h b/drivers/scsi/fnic/cq_desc.h new file mode 100644 index 000000000000..d1225cf6320e --- /dev/null +++ b/drivers/scsi/fnic/cq_desc.h @@ -0,0 +1,78 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _CQ_DESC_H_ +#define _CQ_DESC_H_ + +/* + * Completion queue descriptor types + */ +enum cq_desc_types { + CQ_DESC_TYPE_WQ_ENET = 0, + CQ_DESC_TYPE_DESC_COPY = 1, + CQ_DESC_TYPE_WQ_EXCH = 2, + CQ_DESC_TYPE_RQ_ENET = 3, + CQ_DESC_TYPE_RQ_FCP = 4, +}; + +/* Completion queue descriptor: 16B + * + * All completion queues have this basic layout. The + * type_specfic area is unique for each completion + * queue type. + */ +struct cq_desc { + __le16 completed_index; + __le16 q_number; + u8 type_specfic[11]; + u8 type_color; +}; + +#define CQ_DESC_TYPE_BITS 4 +#define CQ_DESC_TYPE_MASK ((1 << CQ_DESC_TYPE_BITS) - 1) +#define CQ_DESC_COLOR_MASK 1 +#define CQ_DESC_COLOR_SHIFT 7 +#define CQ_DESC_Q_NUM_BITS 10 +#define CQ_DESC_Q_NUM_MASK ((1 << CQ_DESC_Q_NUM_BITS) - 1) +#define CQ_DESC_COMP_NDX_BITS 12 +#define CQ_DESC_COMP_NDX_MASK ((1 << CQ_DESC_COMP_NDX_BITS) - 1) + +static inline void cq_desc_dec(const struct cq_desc *desc_arg, + u8 *type, u8 *color, u16 *q_number, u16 *completed_index) +{ + const struct cq_desc *desc = desc_arg; + const u8 type_color = desc->type_color; + + *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; + + /* + * Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + + rmb(); + + *type = type_color & CQ_DESC_TYPE_MASK; + *q_number = le16_to_cpu(desc->q_number) & CQ_DESC_Q_NUM_MASK; + *completed_index = le16_to_cpu(desc->completed_index) & + CQ_DESC_COMP_NDX_MASK; +} + +#endif /* _CQ_DESC_H_ */ diff --git a/drivers/scsi/fnic/cq_enet_desc.h b/drivers/scsi/fnic/cq_enet_desc.h new file mode 100644 index 000000000000..a9fa26f82ddd --- /dev/null +++ b/drivers/scsi/fnic/cq_enet_desc.h @@ -0,0 +1,167 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _CQ_ENET_DESC_H_ +#define _CQ_ENET_DESC_H_ + +#include "cq_desc.h" + +/* Ethernet completion queue descriptor: 16B */ +struct cq_enet_wq_desc { + __le16 completed_index; + __le16 q_number; + u8 reserved[11]; + u8 type_color; +}; + +static inline void cq_enet_wq_desc_dec(struct cq_enet_wq_desc *desc, + u8 *type, u8 *color, u16 *q_number, u16 *completed_index) +{ + cq_desc_dec((struct cq_desc *)desc, type, + color, q_number, completed_index); +} + +/* Completion queue descriptor: Ethernet receive queue, 16B */ +struct cq_enet_rq_desc { + __le16 completed_index_flags; + __le16 q_number_rss_type_flags; + __le32 rss_hash; + __le16 bytes_written_flags; + __le16 vlan; + __le16 checksum_fcoe; + u8 flags; + u8 type_color; +}; + +#define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT (0x1 << 12) +#define CQ_ENET_RQ_DESC_FLAGS_FCOE (0x1 << 13) +#define CQ_ENET_RQ_DESC_FLAGS_EOP (0x1 << 14) +#define CQ_ENET_RQ_DESC_FLAGS_SOP (0x1 << 15) + +#define CQ_ENET_RQ_DESC_RSS_TYPE_BITS 4 +#define CQ_ENET_RQ_DESC_RSS_TYPE_MASK \ + ((1 << CQ_ENET_RQ_DESC_RSS_TYPE_BITS) - 1) +#define CQ_ENET_RQ_DESC_RSS_TYPE_NONE 0 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv4 1 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4 2 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6 3 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6 4 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX 5 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX 6 + +#define CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC (0x1 << 14) + +#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS 14 +#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK \ + ((1 << CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS) - 1) +#define CQ_ENET_RQ_DESC_FLAGS_TRUNCATED (0x1 << 14) +#define CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED (0x1 << 15) + +#define CQ_ENET_RQ_DESC_FCOE_SOF_BITS 4 +#define CQ_ENET_RQ_DESC_FCOE_SOF_MASK \ + ((1 << CQ_ENET_RQ_DESC_FCOE_SOF_BITS) - 1) +#define CQ_ENET_RQ_DESC_FCOE_EOF_BITS 8 +#define CQ_ENET_RQ_DESC_FCOE_EOF_MASK \ + ((1 << CQ_ENET_RQ_DESC_FCOE_EOF_BITS) - 1) +#define CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT 8 + +#define CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK (0x1 << 0) +#define CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK (0x1 << 0) +#define CQ_ENET_RQ_DESC_FLAGS_UDP (0x1 << 1) +#define CQ_ENET_RQ_DESC_FCOE_ENC_ERROR (0x1 << 1) +#define CQ_ENET_RQ_DESC_FLAGS_TCP (0x1 << 2) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK (0x1 << 3) +#define CQ_ENET_RQ_DESC_FLAGS_IPV6 (0x1 << 4) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4 (0x1 << 5) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT (0x1 << 6) +#define CQ_ENET_RQ_DESC_FLAGS_FCS_OK (0x1 << 7) + +static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc, + u8 *type, u8 *color, u16 *q_number, u16 *completed_index, + u8 *ingress_port, u8 *fcoe, u8 *eop, u8 *sop, u8 *rss_type, + u8 *csum_not_calc, u32 *rss_hash, u16 *bytes_written, u8 *packet_error, + u8 *vlan_stripped, u16 *vlan, u16 *checksum, u8 *fcoe_sof, + u8 *fcoe_fc_crc_ok, u8 *fcoe_enc_error, u8 *fcoe_eof, + u8 *tcp_udp_csum_ok, u8 *udp, u8 *tcp, u8 *ipv4_csum_ok, + u8 *ipv6, u8 *ipv4, u8 *ipv4_fragment, u8 *fcs_ok) +{ + u16 completed_index_flags = le16_to_cpu(desc->completed_index_flags); + u16 q_number_rss_type_flags = + le16_to_cpu(desc->q_number_rss_type_flags); + u16 bytes_written_flags = le16_to_cpu(desc->bytes_written_flags); + + cq_desc_dec((struct cq_desc *)desc, type, + color, q_number, completed_index); + + *ingress_port = (completed_index_flags & + CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0; + *fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ? + 1 : 0; + *eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ? + 1 : 0; + *sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ? + 1 : 0; + + *rss_type = (u8)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) & + CQ_ENET_RQ_DESC_RSS_TYPE_MASK); + *csum_not_calc = (q_number_rss_type_flags & + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0; + + *rss_hash = le32_to_cpu(desc->rss_hash); + + *bytes_written = bytes_written_flags & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + *packet_error = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0; + *vlan_stripped = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0; + + *vlan = le16_to_cpu(desc->vlan); + + if (*fcoe) { + *fcoe_sof = (u8)(le16_to_cpu(desc->checksum_fcoe) & + CQ_ENET_RQ_DESC_FCOE_SOF_MASK); + *fcoe_fc_crc_ok = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0; + *fcoe_enc_error = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0; + *fcoe_eof = (u8)((desc->checksum_fcoe >> + CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) & + CQ_ENET_RQ_DESC_FCOE_EOF_MASK); + *checksum = 0; + } else { + *fcoe_sof = 0; + *fcoe_fc_crc_ok = 0; + *fcoe_enc_error = 0; + *fcoe_eof = 0; + *checksum = le16_to_cpu(desc->checksum_fcoe); + } + + *tcp_udp_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0; + *udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0; + *tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0; + *ipv4_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0; + *ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0; + *ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0; + *ipv4_fragment = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0; + *fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0; +} + +#endif /* _CQ_ENET_DESC_H_ */ diff --git a/drivers/scsi/fnic/cq_exch_desc.h b/drivers/scsi/fnic/cq_exch_desc.h new file mode 100644 index 000000000000..501660cfe228 --- /dev/null +++ b/drivers/scsi/fnic/cq_exch_desc.h @@ -0,0 +1,182 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _CQ_EXCH_DESC_H_ +#define _CQ_EXCH_DESC_H_ + +#include "cq_desc.h" + +/* Exchange completion queue descriptor: 16B */ +struct cq_exch_wq_desc { + u16 completed_index; + u16 q_number; + u16 exchange_id; + u8 tmpl; + u8 reserved0; + u32 reserved1; + u8 exch_status; + u8 reserved2[2]; + u8 type_color; +}; + +#define CQ_EXCH_WQ_STATUS_BITS 2 +#define CQ_EXCH_WQ_STATUS_MASK ((1 << CQ_EXCH_WQ_STATUS_BITS) - 1) + +enum cq_exch_status_types { + CQ_EXCH_WQ_STATUS_TYPE_COMPLETE = 0, + CQ_EXCH_WQ_STATUS_TYPE_ABORT = 1, + CQ_EXCH_WQ_STATUS_TYPE_SGL_EOF = 2, + CQ_EXCH_WQ_STATUS_TYPE_TMPL_ERR = 3, +}; + +static inline void cq_exch_wq_desc_dec(struct cq_exch_wq_desc *desc_ptr, + u8 *type, + u8 *color, + u16 *q_number, + u16 *completed_index, + u8 *exch_status) +{ + cq_desc_dec((struct cq_desc *)desc_ptr, type, + color, q_number, completed_index); + *exch_status = desc_ptr->exch_status & CQ_EXCH_WQ_STATUS_MASK; +} + +struct cq_fcp_rq_desc { + u16 completed_index_eop_sop_prt; + u16 q_number; + u16 exchange_id; + u16 tmpl; + u16 bytes_written; + u16 vlan; + u8 sof; + u8 eof; + u8 fcs_fer_fck; + u8 type_color; +}; + +#define CQ_FCP_RQ_DESC_FLAGS_SOP (1 << 15) +#define CQ_FCP_RQ_DESC_FLAGS_EOP (1 << 14) +#define CQ_FCP_RQ_DESC_FLAGS_PRT (1 << 12) +#define CQ_FCP_RQ_DESC_TMPL_MASK 0x1f +#define CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK 0x3fff +#define CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT 14 +#define CQ_FCP_RQ_DESC_PACKET_ERR_MASK (1 << CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT) +#define CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT 15 +#define CQ_FCP_RQ_DESC_VS_STRIPPED_MASK (1 << CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT) +#define CQ_FCP_RQ_DESC_FC_CRC_OK_MASK 0x1 +#define CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT 1 +#define CQ_FCP_RQ_DESC_FCOE_ERR_MASK (1 << CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT) +#define CQ_FCP_RQ_DESC_FCS_OK_SHIFT 7 +#define CQ_FCP_RQ_DESC_FCS_OK_MASK (1 << CQ_FCP_RQ_DESC_FCS_OK_SHIFT) + +static inline void cq_fcp_rq_desc_dec(struct cq_fcp_rq_desc *desc_ptr, + u8 *type, + u8 *color, + u16 *q_number, + u16 *completed_index, + u8 *eop, + u8 *sop, + u8 *fck, + u16 *exchange_id, + u16 *tmpl, + u32 *bytes_written, + u8 *sof, + u8 *eof, + u8 *ingress_port, + u8 *packet_err, + u8 *fcoe_err, + u8 *fcs_ok, + u8 *vlan_stripped, + u16 *vlan) +{ + cq_desc_dec((struct cq_desc *)desc_ptr, type, + color, q_number, completed_index); + *eop = (desc_ptr->completed_index_eop_sop_prt & + CQ_FCP_RQ_DESC_FLAGS_EOP) ? 1 : 0; + *sop = (desc_ptr->completed_index_eop_sop_prt & + CQ_FCP_RQ_DESC_FLAGS_SOP) ? 1 : 0; + *ingress_port = + (desc_ptr->completed_index_eop_sop_prt & + CQ_FCP_RQ_DESC_FLAGS_PRT) ? 1 : 0; + *exchange_id = desc_ptr->exchange_id; + *tmpl = desc_ptr->tmpl & CQ_FCP_RQ_DESC_TMPL_MASK; + *bytes_written = + desc_ptr->bytes_written & CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK; + *packet_err = + (desc_ptr->bytes_written & CQ_FCP_RQ_DESC_PACKET_ERR_MASK) >> + CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT; + *vlan_stripped = + (desc_ptr->bytes_written & CQ_FCP_RQ_DESC_VS_STRIPPED_MASK) >> + CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT; + *vlan = desc_ptr->vlan; + *sof = desc_ptr->sof; + *fck = desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FC_CRC_OK_MASK; + *fcoe_err = (desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCOE_ERR_MASK) >> + CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT; + *eof = desc_ptr->eof; + *fcs_ok = + (desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCS_OK_MASK) >> + CQ_FCP_RQ_DESC_FCS_OK_SHIFT; +} + +struct cq_sgl_desc { + u16 exchange_id; + u16 q_number; + u32 active_burst_offset; + u32 tot_data_bytes; + u16 tmpl; + u8 sgl_err; + u8 type_color; +}; + +enum cq_sgl_err_types { + CQ_SGL_ERR_NO_ERROR = 0, + CQ_SGL_ERR_OVERFLOW, /* data ran beyond end of SGL */ + CQ_SGL_ERR_SGL_LCL_ADDR_ERR, /* sgl access to local vnic addr illegal*/ + CQ_SGL_ERR_ADDR_RSP_ERR, /* sgl address error */ + CQ_SGL_ERR_DATA_RSP_ERR, /* sgl data rsp error */ + CQ_SGL_ERR_CNT_ZERO_ERR, /* SGL count is 0 */ + CQ_SGL_ERR_CNT_MAX_ERR, /* SGL count is larger than supported */ + CQ_SGL_ERR_ORDER_ERR, /* frames recv on both ports, order err */ + CQ_SGL_ERR_DATA_LCL_ADDR_ERR,/* sgl data buf to local vnic addr ill */ + CQ_SGL_ERR_HOST_CQ_ERR, /* host cq entry to local vnic addr ill */ +}; + +#define CQ_SGL_SGL_ERR_MASK 0x1f +#define CQ_SGL_TMPL_MASK 0x1f + +static inline void cq_sgl_desc_dec(struct cq_sgl_desc *desc_ptr, + u8 *type, + u8 *color, + u16 *q_number, + u16 *exchange_id, + u32 *active_burst_offset, + u32 *tot_data_bytes, + u16 *tmpl, + u8 *sgl_err) +{ + /* Cheat a little by assuming exchange_id is the same as completed + index */ + cq_desc_dec((struct cq_desc *)desc_ptr, type, color, q_number, + exchange_id); + *active_burst_offset = desc_ptr->active_burst_offset; + *tot_data_bytes = desc_ptr->tot_data_bytes; + *tmpl = desc_ptr->tmpl & CQ_SGL_TMPL_MASK; + *sgl_err = desc_ptr->sgl_err & CQ_SGL_SGL_ERR_MASK; +} + +#endif /* _CQ_EXCH_DESC_H_ */ diff --git a/drivers/scsi/fnic/fcpio.h b/drivers/scsi/fnic/fcpio.h new file mode 100644 index 000000000000..12d770d885c5 --- /dev/null +++ b/drivers/scsi/fnic/fcpio.h @@ -0,0 +1,780 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FCPIO_H_ +#define _FCPIO_H_ + +#include + +/* + * This header file includes all of the data structures used for + * communication by the host driver to the fcp firmware. + */ + +/* + * Exchange and sequence id space allocated to the host driver + */ +#define FCPIO_HOST_EXCH_RANGE_START 0x1000 +#define FCPIO_HOST_EXCH_RANGE_END 0x1fff +#define FCPIO_HOST_SEQ_ID_RANGE_START 0x80 +#define FCPIO_HOST_SEQ_ID_RANGE_END 0xff + +/* + * Command entry type + */ +enum fcpio_type { + /* + * Initiator request types + */ + FCPIO_ICMND_16 = 0x1, + FCPIO_ICMND_32, + FCPIO_ICMND_CMPL, + FCPIO_ITMF, + FCPIO_ITMF_CMPL, + + /* + * Target request types + */ + FCPIO_TCMND_16 = 0x11, + FCPIO_TCMND_32, + FCPIO_TDATA, + FCPIO_TXRDY, + FCPIO_TRSP, + FCPIO_TDRSP_CMPL, + FCPIO_TTMF, + FCPIO_TTMF_ACK, + FCPIO_TABORT, + FCPIO_TABORT_CMPL, + + /* + * Misc request types + */ + FCPIO_ACK = 0x20, + FCPIO_RESET, + FCPIO_RESET_CMPL, + FCPIO_FLOGI_REG, + FCPIO_FLOGI_REG_CMPL, + FCPIO_ECHO, + FCPIO_ECHO_CMPL, + FCPIO_LUNMAP_CHNG, + FCPIO_LUNMAP_REQ, + FCPIO_LUNMAP_REQ_CMPL, + FCPIO_FLOGI_FIP_REG, + FCPIO_FLOGI_FIP_REG_CMPL, +}; + +/* + * Header status codes from the firmware + */ +enum fcpio_status { + FCPIO_SUCCESS = 0, /* request was successful */ + + /* + * If a request to the firmware is rejected, the original request + * header will be returned with the status set to one of the following: + */ + FCPIO_INVALID_HEADER, /* header contains invalid data */ + FCPIO_OUT_OF_RESOURCE, /* out of resources to complete request */ + FCPIO_INVALID_PARAM, /* some parameter in request is invalid */ + FCPIO_REQ_NOT_SUPPORTED, /* request type is not supported */ + FCPIO_IO_NOT_FOUND, /* requested I/O was not found */ + + /* + * Once a request is processed, the firmware will usually return + * a cmpl message type. In cases where errors occurred, + * the header status field would be filled in with one of the following: + */ + FCPIO_ABORTED = 0x41, /* request was aborted */ + FCPIO_TIMEOUT, /* request was timed out */ + FCPIO_SGL_INVALID, /* request was aborted due to sgl error */ + FCPIO_MSS_INVALID, /* request was aborted due to mss error */ + FCPIO_DATA_CNT_MISMATCH, /* recv/sent more/less data than exp. */ + FCPIO_FW_ERR, /* request was terminated due to fw error */ + FCPIO_ITMF_REJECTED, /* itmf req was rejected by remote node */ + FCPIO_ITMF_FAILED, /* itmf req was failed by remote node */ + FCPIO_ITMF_INCORRECT_LUN, /* itmf req targeted incorrect LUN */ + FCPIO_CMND_REJECTED, /* request was invalid and rejected */ + FCPIO_NO_PATH_AVAIL, /* no paths to the lun was available */ + FCPIO_PATH_FAILED, /* i/o sent to current path failed */ + FCPIO_LUNMAP_CHNG_PEND, /* i/o rejected due to lunmap change */ +}; + +/* + * The header command tag. All host requests will use the "tag" field + * to mark commands with a unique tag. When the firmware responds to + * a host request, it will copy the tag field into the response. + * + * The only firmware requests that will use the rx_id/ox_id fields instead + * of the tag field will be the target command and target task management + * requests. These two requests do not have corresponding host requests + * since they come directly from the FC initiator on the network. + */ +struct fcpio_tag { + union { + u32 req_id; + struct { + u16 rx_id; + u16 ox_id; + } ex_id; + } u; +}; + +static inline void +fcpio_tag_id_enc(struct fcpio_tag *tag, u32 id) +{ + tag->u.req_id = id; +} + +static inline void +fcpio_tag_id_dec(struct fcpio_tag *tag, u32 *id) +{ + *id = tag->u.req_id; +} + +static inline void +fcpio_tag_exid_enc(struct fcpio_tag *tag, u16 ox_id, u16 rx_id) +{ + tag->u.ex_id.rx_id = rx_id; + tag->u.ex_id.ox_id = ox_id; +} + +static inline void +fcpio_tag_exid_dec(struct fcpio_tag *tag, u16 *ox_id, u16 *rx_id) +{ + *rx_id = tag->u.ex_id.rx_id; + *ox_id = tag->u.ex_id.ox_id; +} + +/* + * The header for an fcpio request, whether from the firmware or from the + * host driver + */ +struct fcpio_header { + u8 type; /* enum fcpio_type */ + u8 status; /* header status entry */ + u16 _resvd; /* reserved */ + struct fcpio_tag tag; /* header tag */ +}; + +static inline void +fcpio_header_enc(struct fcpio_header *hdr, + u8 type, u8 status, + struct fcpio_tag tag) +{ + hdr->type = type; + hdr->status = status; + hdr->_resvd = 0; + hdr->tag = tag; +} + +static inline void +fcpio_header_dec(struct fcpio_header *hdr, + u8 *type, u8 *status, + struct fcpio_tag *tag) +{ + *type = hdr->type; + *status = hdr->status; + *tag = hdr->tag; +} + +#define CDB_16 16 +#define CDB_32 32 +#define LUN_ADDRESS 8 + +/* + * fcpio_icmnd_16: host -> firmware request + * + * used for sending out an initiator SCSI 16-byte command + */ +struct fcpio_icmnd_16 { + u32 lunmap_id; /* index into lunmap table */ + u8 special_req_flags; /* special exchange request flags */ + u8 _resvd0[3]; /* reserved */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 sense_len; /* sense buffer length */ + u64 sgl_addr; /* scatter-gather list addr */ + u64 sense_addr; /* sense buffer address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd1; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_16]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 _resvd2; /* reserved */ + u8 d_id[3]; /* FC vNIC only: Target D_ID */ + u16 mss; /* FC vNIC only: max burst */ + u16 _resvd3; /* reserved */ + u32 r_a_tov; /* FC vNIC only: Res. Alloc Timeout */ + u32 e_d_tov; /* FC vNIC only: Err Detect Timeout */ +}; + +/* + * Special request flags + */ +#define FCPIO_ICMND_SRFLAG_RETRY 0x01 /* Enable Retry handling on exchange */ + +/* + * Priority/Task Attribute settings + */ +#define FCPIO_ICMND_PTA_SIMPLE 0 /* simple task attribute */ +#define FCPIO_ICMND_PTA_HEADQ 1 /* head of queue task attribute */ +#define FCPIO_ICMND_PTA_ORDERED 2 /* ordered task attribute */ +#define FCPIO_ICMND_PTA_ACA 4 /* auto contingent allegiance */ +#define FCPIO_ICMND_PRI_SHIFT 3 /* priority field starts in bit 3 */ + +/* + * Command flags + */ +#define FCPIO_ICMND_RDDATA 0x02 /* read data */ +#define FCPIO_ICMND_WRDATA 0x01 /* write data */ + +/* + * fcpio_icmnd_32: host -> firmware request + * + * used for sending out an initiator SCSI 32-byte command + */ +struct fcpio_icmnd_32 { + u32 lunmap_id; /* index into lunmap table */ + u8 special_req_flags; /* special exchange request flags */ + u8 _resvd0[3]; /* reserved */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 sense_len; /* sense buffer length */ + u64 sgl_addr; /* scatter-gather list addr */ + u64 sense_addr; /* sense buffer address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd1; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_32]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 _resvd2; /* reserved */ + u8 d_id[3]; /* FC vNIC only: Target D_ID */ + u16 mss; /* FC vNIC only: max burst */ + u16 _resvd3; /* reserved */ + u32 r_a_tov; /* FC vNIC only: Res. Alloc Timeout */ + u32 e_d_tov; /* FC vNIC only: Error Detect Timeout */ +}; + +/* + * fcpio_itmf: host -> firmware request + * + * used for requesting the firmware to abort a request and/or send out + * a task management function + * + * The t_tag field is only needed when the request type is ABT_TASK. + */ +struct fcpio_itmf { + u32 lunmap_id; /* index into lunmap table */ + u32 tm_req; /* SCSI Task Management request */ + u32 t_tag; /* header tag of fcpio to be aborted */ + u32 _resvd; /* _reserved */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 _resvd1; /* reserved */ + u8 d_id[3]; /* FC vNIC only: Target D_ID */ + u32 r_a_tov; /* FC vNIC only: R_A_TOV in msec */ + u32 e_d_tov; /* FC vNIC only: E_D_TOV in msec */ +}; + +/* + * Task Management request + */ +enum fcpio_itmf_tm_req_type { + FCPIO_ITMF_ABT_TASK_TERM = 0x01, /* abort task and terminate */ + FCPIO_ITMF_ABT_TASK, /* abort task and issue abts */ + FCPIO_ITMF_ABT_TASK_SET, /* abort task set */ + FCPIO_ITMF_CLR_TASK_SET, /* clear task set */ + FCPIO_ITMF_LUN_RESET, /* logical unit reset task mgmt */ + FCPIO_ITMF_CLR_ACA, /* Clear ACA condition */ +}; + +/* + * fcpio_tdata: host -> firmware request + * + * used for requesting the firmware to send out a read data transfer for a + * target command + */ +struct fcpio_tdata { + u16 rx_id; /* FC rx_id of target command */ + u16 flags; /* command flags */ + u32 rel_offset; /* data sequence relative offset */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 data_len; /* length of data expected to send */ + u64 sgl_addr; /* scatter-gather list address */ +}; + +/* + * Command flags + */ +#define FCPIO_TDATA_SCSI_RSP 0x01 /* send a scsi resp. after last frame */ + +/* + * fcpio_txrdy: host -> firmware request + * + * used for requesting the firmware to send out a write data transfer for a + * target command + */ +struct fcpio_txrdy { + u16 rx_id; /* FC rx_id of target command */ + u16 _resvd0; /* reserved */ + u32 rel_offset; /* data sequence relative offset */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 data_len; /* length of data expected to send */ + u64 sgl_addr; /* scatter-gather list address */ +}; + +/* + * fcpio_trsp: host -> firmware request + * + * used for requesting the firmware to send out a response for a target + * command + */ +struct fcpio_trsp { + u16 rx_id; /* FC rx_id of target command */ + u16 _resvd0; /* reserved */ + u32 sense_len; /* sense data buffer length */ + u64 sense_addr; /* sense data buffer address */ + u16 _resvd1; /* reserved */ + u8 flags; /* response request flags */ + u8 scsi_status; /* SCSI status */ + u32 residual; /* SCSI data residual value of I/O */ +}; + +/* + * resposnse request flags + */ +#define FCPIO_TRSP_RESID_UNDER 0x08 /* residual is valid and is underflow */ +#define FCPIO_TRSP_RESID_OVER 0x04 /* residual is valid and is overflow */ + +/* + * fcpio_ttmf_ack: host -> firmware response + * + * used by the host to indicate to the firmware it has received and processed + * the target tmf request + */ +struct fcpio_ttmf_ack { + u16 rx_id; /* FC rx_id of target command */ + u16 _resvd0; /* reserved */ + u32 tmf_status; /* SCSI task management status */ +}; + +/* + * fcpio_tabort: host -> firmware request + * + * used by the host to request the firmware to abort a target request that was + * received by the firmware + */ +struct fcpio_tabort { + u16 rx_id; /* rx_id of the target request */ +}; + +/* + * fcpio_reset: host -> firmware request + * + * used by the host to signal a reset of the driver to the firmware + * and to request firmware to clean up all outstanding I/O + */ +struct fcpio_reset { + u32 _resvd; +}; + +enum fcpio_flogi_reg_format_type { + FCPIO_FLOGI_REG_DEF_DEST = 0, /* Use the oui | s_id mac format */ + FCPIO_FLOGI_REG_GW_DEST, /* Use the fixed gateway mac */ +}; + +/* + * fcpio_flogi_reg: host -> firmware request + * + * fc vnic only + * used by the host to notify the firmware of the lif's s_id + * and destination mac address format + */ +struct fcpio_flogi_reg { + u8 format; + u8 s_id[3]; /* FC vNIC only: Source S_ID */ + u8 gateway_mac[ETH_ALEN]; /* Destination gateway mac */ + u16 _resvd; + u32 r_a_tov; /* R_A_TOV in msec */ + u32 e_d_tov; /* E_D_TOV in msec */ +}; + +/* + * fcpio_echo: host -> firmware request + * + * sends a heartbeat echo request to the firmware + */ +struct fcpio_echo { + u32 _resvd; +}; + +/* + * fcpio_lunmap_req: host -> firmware request + * + * scsi vnic only + * sends a request to retrieve the lunmap table for scsi vnics + */ +struct fcpio_lunmap_req { + u64 addr; /* address of the buffer */ + u32 len; /* len of the buffer */ +}; + +/* + * fcpio_flogi_fip_reg: host -> firmware request + * + * fc vnic only + * used by the host to notify the firmware of the lif's s_id + * and destination mac address format + */ +struct fcpio_flogi_fip_reg { + u8 _resvd0; + u8 s_id[3]; /* FC vNIC only: Source S_ID */ + u8 fcf_mac[ETH_ALEN]; /* FCF Target destination mac */ + u16 _resvd1; + u32 r_a_tov; /* R_A_TOV in msec */ + u32 e_d_tov; /* E_D_TOV in msec */ + u8 ha_mac[ETH_ALEN]; /* Host adapter source mac */ + u16 _resvd2; +}; + +/* + * Basic structure for all fcpio structures that are sent from the host to the + * firmware. They are 128 bytes per structure. + */ +#define FCPIO_HOST_REQ_LEN 128 /* expected length of host requests */ + +struct fcpio_host_req { + struct fcpio_header hdr; + + union { + /* + * Defines space needed for request + */ + u8 buf[FCPIO_HOST_REQ_LEN - sizeof(struct fcpio_header)]; + + /* + * Initiator host requests + */ + struct fcpio_icmnd_16 icmnd_16; + struct fcpio_icmnd_32 icmnd_32; + struct fcpio_itmf itmf; + + /* + * Target host requests + */ + struct fcpio_tdata tdata; + struct fcpio_txrdy txrdy; + struct fcpio_trsp trsp; + struct fcpio_ttmf_ack ttmf_ack; + struct fcpio_tabort tabort; + + /* + * Misc requests + */ + struct fcpio_reset reset; + struct fcpio_flogi_reg flogi_reg; + struct fcpio_echo echo; + struct fcpio_lunmap_req lunmap_req; + struct fcpio_flogi_fip_reg flogi_fip_reg; + } u; +}; + +/* + * fcpio_icmnd_cmpl: firmware -> host response + * + * used for sending the host a response to an initiator command + */ +struct fcpio_icmnd_cmpl { + u8 _resvd0[6]; /* reserved */ + u8 flags; /* response flags */ + u8 scsi_status; /* SCSI status */ + u32 residual; /* SCSI data residual length */ + u32 sense_len; /* SCSI sense length */ +}; + +/* + * response flags + */ +#define FCPIO_ICMND_CMPL_RESID_UNDER 0x08 /* resid under and valid */ +#define FCPIO_ICMND_CMPL_RESID_OVER 0x04 /* resid over and valid */ + +/* + * fcpio_itmf_cmpl: firmware -> host response + * + * used for sending the host a response for a itmf request + */ +struct fcpio_itmf_cmpl { + u32 _resvd; /* reserved */ +}; + +/* + * fcpio_tcmnd_16: firmware -> host request + * + * used by the firmware to notify the host of an incoming target SCSI 16-Byte + * request + */ +struct fcpio_tcmnd_16 { + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd2; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_16]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 _resvd1; /* reserved */ + u8 s_id[3]; /* FC vNIC only: Source S_ID */ +}; + +/* + * Priority/Task Attribute settings + */ +#define FCPIO_TCMND_PTA_SIMPLE 0 /* simple task attribute */ +#define FCPIO_TCMND_PTA_HEADQ 1 /* head of queue task attribute */ +#define FCPIO_TCMND_PTA_ORDERED 2 /* ordered task attribute */ +#define FCPIO_TCMND_PTA_ACA 4 /* auto contingent allegiance */ +#define FCPIO_TCMND_PRI_SHIFT 3 /* priority field starts in bit 3 */ + +/* + * Command flags + */ +#define FCPIO_TCMND_RDDATA 0x02 /* read data */ +#define FCPIO_TCMND_WRDATA 0x01 /* write data */ + +/* + * fcpio_tcmnd_32: firmware -> host request + * + * used by the firmware to notify the host of an incoming target SCSI 32-Byte + * request + */ +struct fcpio_tcmnd_32 { + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd2; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_32]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 _resvd0; /* reserved */ + u8 s_id[3]; /* FC vNIC only: Source S_ID */ +}; + +/* + * fcpio_tdrsp_cmpl: firmware -> host response + * + * used by the firmware to notify the host of a response to a host target + * command + */ +struct fcpio_tdrsp_cmpl { + u16 rx_id; /* rx_id of the target request */ + u16 _resvd0; /* reserved */ +}; + +/* + * fcpio_ttmf: firmware -> host request + * + * used by the firmware to notify the host of an incoming task management + * function request + */ +struct fcpio_ttmf { + u8 _resvd0; /* reserved */ + u8 s_id[3]; /* FC vNIC only: Source S_ID */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 crn; /* SCSI Command Reference No. */ + u8 _resvd2[3]; /* reserved */ + u32 tmf_type; /* task management request type */ +}; + +/* + * Task Management request + */ +#define FCPIO_TTMF_CLR_ACA 0x40 /* Clear ACA condition */ +#define FCPIO_TTMF_LUN_RESET 0x10 /* logical unit reset task mgmt */ +#define FCPIO_TTMF_CLR_TASK_SET 0x04 /* clear task set */ +#define FCPIO_TTMF_ABT_TASK_SET 0x02 /* abort task set */ +#define FCPIO_TTMF_ABT_TASK 0x01 /* abort task */ + +/* + * fcpio_tabort_cmpl: firmware -> host response + * + * used by the firmware to respond to a host's tabort request + */ +struct fcpio_tabort_cmpl { + u16 rx_id; /* rx_id of the target request */ + u16 _resvd0; /* reserved */ +}; + +/* + * fcpio_ack: firmware -> host response + * + * used by firmware to notify the host of the last work request received + */ +struct fcpio_ack { + u16 request_out; /* last host entry received */ + u16 _resvd; +}; + +/* + * fcpio_reset_cmpl: firmware -> host response + * + * use by firmware to respond to the host's reset request + */ +struct fcpio_reset_cmpl { + u16 vnic_id; +}; + +/* + * fcpio_flogi_reg_cmpl: firmware -> host response + * + * fc vnic only + * response to the fcpio_flogi_reg request + */ +struct fcpio_flogi_reg_cmpl { + u32 _resvd; +}; + +/* + * fcpio_echo_cmpl: firmware -> host response + * + * response to the fcpio_echo request + */ +struct fcpio_echo_cmpl { + u32 _resvd; +}; + +/* + * fcpio_lunmap_chng: firmware -> host notification + * + * scsi vnic only + * notifies the host that the lunmap tables have changed + */ +struct fcpio_lunmap_chng { + u32 _resvd; +}; + +/* + * fcpio_lunmap_req_cmpl: firmware -> host response + * + * scsi vnic only + * response for lunmap table request from the host + */ +struct fcpio_lunmap_req_cmpl { + u32 _resvd; +}; + +/* + * Basic structure for all fcpio structures that are sent from the firmware to + * the host. They are 64 bytes per structure. + */ +#define FCPIO_FW_REQ_LEN 64 /* expected length of fw requests */ +struct fcpio_fw_req { + struct fcpio_header hdr; + + union { + /* + * Defines space needed for request + */ + u8 buf[FCPIO_FW_REQ_LEN - sizeof(struct fcpio_header)]; + + /* + * Initiator firmware responses + */ + struct fcpio_icmnd_cmpl icmnd_cmpl; + struct fcpio_itmf_cmpl itmf_cmpl; + + /* + * Target firmware new requests + */ + struct fcpio_tcmnd_16 tcmnd_16; + struct fcpio_tcmnd_32 tcmnd_32; + + /* + * Target firmware responses + */ + struct fcpio_tdrsp_cmpl tdrsp_cmpl; + struct fcpio_ttmf ttmf; + struct fcpio_tabort_cmpl tabort_cmpl; + + /* + * Firmware response to work received + */ + struct fcpio_ack ack; + + /* + * Misc requests + */ + struct fcpio_reset_cmpl reset_cmpl; + struct fcpio_flogi_reg_cmpl flogi_reg_cmpl; + struct fcpio_echo_cmpl echo_cmpl; + struct fcpio_lunmap_chng lunmap_chng; + struct fcpio_lunmap_req_cmpl lunmap_req_cmpl; + } u; +}; + +/* + * Access routines to encode and decode the color bit, which is the most + * significant bit of the MSB of the structure + */ +static inline void fcpio_color_enc(struct fcpio_fw_req *fw_req, u8 color) +{ + u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1; + + if (color) + *c |= 0x80; + else + *c &= ~0x80; +} + +static inline void fcpio_color_dec(struct fcpio_fw_req *fw_req, u8 *color) +{ + u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1; + + *color = *c >> 7; + + /* + * Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + + rmb(); + +} + +/* + * Lunmap table entry for scsi vnics + */ +#define FCPIO_LUNMAP_TABLE_SIZE 256 +#define FCPIO_FLAGS_LUNMAP_VALID 0x80 +#define FCPIO_FLAGS_BOOT 0x01 +struct fcpio_lunmap_entry { + u8 bus; + u8 target; + u8 lun; + u8 path_cnt; + u16 flags; + u16 update_cnt; +}; + +struct fcpio_lunmap_tbl { + u32 update_cnt; + struct fcpio_lunmap_entry lunmaps[FCPIO_LUNMAP_TABLE_SIZE]; +}; + +#endif /* _FCPIO_H_ */ diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h new file mode 100644 index 000000000000..e4c0a3d7d87b --- /dev/null +++ b/drivers/scsi/fnic/fnic.h @@ -0,0 +1,265 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FNIC_H_ +#define _FNIC_H_ + +#include +#include +#include +#include +#include "fnic_io.h" +#include "fnic_res.h" +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_wq_copy.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "vnic_scsi.h" + +#define DRV_NAME "fnic" +#define DRV_DESCRIPTION "Cisco FCoE HBA Driver" +#define DRV_VERSION "1.0.0.1121" +#define PFX DRV_NAME ": " +#define DFX DRV_NAME "%d: " + +#define DESC_CLEAN_LOW_WATERMARK 8 +#define FNIC_MAX_IO_REQ 2048 /* scsi_cmnd tag map entries */ +#define FNIC_IO_LOCKS 64 /* IO locks: power of 2 */ +#define FNIC_DFLT_QUEUE_DEPTH 32 +#define FNIC_STATS_RATE_LIMIT 4 /* limit rate at which stats are pulled up */ + +/* + * Tag bits used for special requests. + */ +#define BIT(nr) (1UL << (nr)) +#define FNIC_TAG_ABORT BIT(30) /* tag bit indicating abort */ +#define FNIC_TAG_DEV_RST BIT(29) /* indicates device reset */ +#define FNIC_TAG_MASK (BIT(24) - 1) /* mask for lookup */ +#define FNIC_NO_TAG -1 + +/* + * Usage of the scsi_cmnd scratchpad. + * These fields are locked by the hashed io_req_lock. + */ +#define CMD_SP(Cmnd) ((Cmnd)->SCp.ptr) +#define CMD_STATE(Cmnd) ((Cmnd)->SCp.phase) +#define CMD_ABTS_STATUS(Cmnd) ((Cmnd)->SCp.Message) +#define CMD_LR_STATUS(Cmnd) ((Cmnd)->SCp.have_data_in) +#define CMD_TAG(Cmnd) ((Cmnd)->SCp.sent_command) + +#define FCPIO_INVALID_CODE 0x100 /* hdr_status value unused by firmware */ + +#define FNIC_LUN_RESET_TIMEOUT 10000 /* mSec */ +#define FNIC_HOST_RESET_TIMEOUT 10000 /* mSec */ +#define FNIC_RMDEVICE_TIMEOUT 1000 /* mSec */ +#define FNIC_HOST_RESET_SETTLE_TIME 30 /* Sec */ + +#define FNIC_MAX_FCP_TARGET 256 + +extern unsigned int fnic_log_level; + +#define FNIC_MAIN_LOGGING 0x01 +#define FNIC_FCS_LOGGING 0x02 +#define FNIC_SCSI_LOGGING 0x04 +#define FNIC_ISR_LOGGING 0x08 + +#define FNIC_CHECK_LOGGING(LEVEL, CMD) \ +do { \ + if (unlikely(fnic_log_level & LEVEL)) \ + do { \ + CMD; \ + } while (0); \ +} while (0) + +#define FNIC_MAIN_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_MAIN_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +#define FNIC_FCS_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_FCS_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +#define FNIC_SCSI_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_SCSI_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +#define FNIC_ISR_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_ISR_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +extern const char *fnic_state_str[]; + +enum fnic_intx_intr_index { + FNIC_INTX_WQ_RQ_COPYWQ, + FNIC_INTX_ERR, + FNIC_INTX_NOTIFY, + FNIC_INTX_INTR_MAX, +}; + +enum fnic_msix_intr_index { + FNIC_MSIX_RQ, + FNIC_MSIX_WQ, + FNIC_MSIX_WQ_COPY, + FNIC_MSIX_ERR_NOTIFY, + FNIC_MSIX_INTR_MAX, +}; + +struct fnic_msix_entry { + int requested; + char devname[IFNAMSIZ]; + irqreturn_t (*isr)(int, void *); + void *devid; +}; + +enum fnic_state { + FNIC_IN_FC_MODE = 0, + FNIC_IN_FC_TRANS_ETH_MODE, + FNIC_IN_ETH_MODE, + FNIC_IN_ETH_TRANS_FC_MODE, +}; + +#define FNIC_WQ_COPY_MAX 1 +#define FNIC_WQ_MAX 1 +#define FNIC_RQ_MAX 1 +#define FNIC_CQ_MAX (FNIC_WQ_COPY_MAX + FNIC_WQ_MAX + FNIC_RQ_MAX) + +struct mempool; + +/* Per-instance private data structure */ +struct fnic { + struct fc_lport *lport; + struct vnic_dev_bar bar0; + + struct msix_entry msix_entry[FNIC_MSIX_INTR_MAX]; + struct fnic_msix_entry msix[FNIC_MSIX_INTR_MAX]; + + struct vnic_stats *stats; + unsigned long stats_time; /* time of stats update */ + struct vnic_nic_cfg *nic_cfg; + char name[IFNAMSIZ]; + struct timer_list notify_timer; /* used for MSI interrupts */ + + unsigned int err_intr_offset; + unsigned int link_intr_offset; + + unsigned int wq_count; + unsigned int cq_count; + + u32 fcoui_mode:1; /* use fcoui address*/ + u32 vlan_hw_insert:1; /* let hw insert the tag */ + u32 in_remove:1; /* fnic device in removal */ + u32 stop_rx_link_events:1; /* stop proc. rx frames, link events */ + + struct completion *remove_wait; /* device remove thread blocks */ + + struct fc_frame *flogi; + struct fc_frame *flogi_resp; + u16 flogi_oxid; + unsigned long s_id; + enum fnic_state state; + spinlock_t fnic_lock; + + u16 vlan_id; /* VLAN tag including priority */ + u8 mac_addr[ETH_ALEN]; + u8 dest_addr[ETH_ALEN]; + u8 data_src_addr[ETH_ALEN]; + u64 fcp_input_bytes; /* internal statistic */ + u64 fcp_output_bytes; /* internal statistic */ + u32 link_down_cnt; + int link_status; + + struct list_head list; + struct pci_dev *pdev; + struct vnic_fc_config config; + struct vnic_dev *vdev; + unsigned int raw_wq_count; + unsigned int wq_copy_count; + unsigned int rq_count; + int fw_ack_index[FNIC_WQ_COPY_MAX]; + unsigned short fw_ack_recd[FNIC_WQ_COPY_MAX]; + unsigned short wq_copy_desc_low[FNIC_WQ_COPY_MAX]; + unsigned int intr_count; + u32 __iomem *legacy_pba; + struct fnic_host_tag *tags; + mempool_t *io_req_pool; + mempool_t *io_sgl_pool[FNIC_SGL_NUM_CACHES]; + spinlock_t io_req_lock[FNIC_IO_LOCKS]; /* locks for scsi cmnds */ + + struct work_struct link_work; + struct work_struct frame_work; + struct sk_buff_head frame_queue; + + /* copy work queue cache line section */ + ____cacheline_aligned struct vnic_wq_copy wq_copy[FNIC_WQ_COPY_MAX]; + /* completion queue cache line section */ + ____cacheline_aligned struct vnic_cq cq[FNIC_CQ_MAX]; + + spinlock_t wq_copy_lock[FNIC_WQ_COPY_MAX]; + + /* work queue cache line section */ + ____cacheline_aligned struct vnic_wq wq[FNIC_WQ_MAX]; + spinlock_t wq_lock[FNIC_WQ_MAX]; + + /* receive queue cache line section */ + ____cacheline_aligned struct vnic_rq rq[FNIC_RQ_MAX]; + + /* interrupt resource cache line section */ + ____cacheline_aligned struct vnic_intr intr[FNIC_MSIX_INTR_MAX]; +}; + +extern struct workqueue_struct *fnic_event_queue; +extern struct device_attribute *fnic_attrs[]; + +void fnic_clear_intr_mode(struct fnic *fnic); +int fnic_set_intr_mode(struct fnic *fnic); +void fnic_free_intr(struct fnic *fnic); +int fnic_request_intr(struct fnic *fnic); + +int fnic_send(struct fc_lport *, struct fc_frame *); +void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf); +void fnic_handle_frame(struct work_struct *work); +void fnic_handle_link(struct work_struct *work); +int fnic_rq_cmpl_handler(struct fnic *fnic, int); +int fnic_alloc_rq_frame(struct vnic_rq *rq); +void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf); +int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp); + +int fnic_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *)); +int fnic_abort_cmd(struct scsi_cmnd *); +int fnic_device_reset(struct scsi_cmnd *); +int fnic_host_reset(struct scsi_cmnd *); +int fnic_reset(struct Scsi_Host *); +void fnic_scsi_cleanup(struct fc_lport *); +void fnic_scsi_abort_io(struct fc_lport *); +void fnic_empty_scsi_cleanup(struct fc_lport *); +void fnic_exch_mgr_reset(struct fc_lport *, u32, u32); +int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int); +int fnic_wq_cmpl_handler(struct fnic *fnic, int); +int fnic_flogi_reg_handler(struct fnic *fnic); +void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, + struct fcpio_host_req *desc); +int fnic_fw_reset_handler(struct fnic *fnic); +void fnic_terminate_rport_io(struct fc_rport *); +const char *fnic_state_to_str(unsigned int state); + +void fnic_log_q_error(struct fnic *fnic); +void fnic_handle_link_event(struct fnic *fnic); + +#endif /* _FNIC_H_ */ diff --git a/drivers/scsi/fnic/fnic_attrs.c b/drivers/scsi/fnic/fnic_attrs.c new file mode 100644 index 000000000000..aea0c3becfd4 --- /dev/null +++ b/drivers/scsi/fnic/fnic_attrs.c @@ -0,0 +1,56 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include "fnic.h" + +static ssize_t fnic_show_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fc_lport *lp = shost_priv(class_to_shost(dev)); + struct fnic *fnic = lport_priv(lp); + + return snprintf(buf, PAGE_SIZE, "%s\n", fnic_state_str[fnic->state]); +} + +static ssize_t fnic_show_drv_version(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION); +} + +static ssize_t fnic_show_link_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fc_lport *lp = shost_priv(class_to_shost(dev)); + + return snprintf(buf, PAGE_SIZE, "%s\n", (lp->link_up) + ? "Link Up" : "Link Down"); +} + +static DEVICE_ATTR(fnic_state, S_IRUGO, fnic_show_state, NULL); +static DEVICE_ATTR(drv_version, S_IRUGO, fnic_show_drv_version, NULL); +static DEVICE_ATTR(link_state, S_IRUGO, fnic_show_link_state, NULL); + +struct device_attribute *fnic_attrs[] = { + &dev_attr_fnic_state, + &dev_attr_drv_version, + &dev_attr_link_state, + NULL, +}; diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c new file mode 100644 index 000000000000..07e6eedb83ce --- /dev/null +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -0,0 +1,742 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fnic_io.h" +#include "fnic.h" +#include "cq_enet_desc.h" +#include "cq_exch_desc.h" + +struct workqueue_struct *fnic_event_queue; + +void fnic_handle_link(struct work_struct *work) +{ + struct fnic *fnic = container_of(work, struct fnic, link_work); + unsigned long flags; + int old_link_status; + u32 old_link_down_cnt; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + return; + } + + old_link_down_cnt = fnic->link_down_cnt; + old_link_status = fnic->link_status; + fnic->link_status = vnic_dev_link_status(fnic->vdev); + fnic->link_down_cnt = vnic_dev_link_down_cnt(fnic->vdev); + + if (old_link_status == fnic->link_status) { + if (!fnic->link_status) + /* DOWN -> DOWN */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + else { + if (old_link_down_cnt != fnic->link_down_cnt) { + /* UP -> DOWN -> UP */ + fnic->lport->host_stats.link_failure_count++; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "link down\n"); + fc_linkdown(fnic->lport); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "link up\n"); + fc_linkup(fnic->lport); + } else + /* UP -> UP */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + } + } else if (fnic->link_status) { + /* DOWN -> UP */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link up\n"); + fc_linkup(fnic->lport); + } else { + /* UP -> DOWN */ + fnic->lport->host_stats.link_failure_count++; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link down\n"); + fc_linkdown(fnic->lport); + } + +} + +/* + * This function passes incoming fabric frames to libFC + */ +void fnic_handle_frame(struct work_struct *work) +{ + struct fnic *fnic = container_of(work, struct fnic, frame_work); + struct fc_lport *lp = fnic->lport; + unsigned long flags; + struct sk_buff *skb; + struct fc_frame *fp; + + while ((skb = skb_dequeue(&fnic->frame_queue))) { + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb(skb); + return; + } + fp = (struct fc_frame *)skb; + /* if Flogi resp frame, register the address */ + if (fr_flags(fp)) { + vnic_dev_add_addr(fnic->vdev, + fnic->data_src_addr); + fr_flags(fp) = 0; + } + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + fc_exch_recv(lp, lp->emp, fp); + } + +} + +static inline void fnic_import_rq_fc_frame(struct sk_buff *skb, + u32 len, u8 sof, u8 eof) +{ + struct fc_frame *fp = (struct fc_frame *)skb; + + skb_trim(skb, len); + fr_eof(fp) = eof; + fr_sof(fp) = sof; +} + + +static inline int fnic_import_rq_eth_pkt(struct sk_buff *skb, u32 len) +{ + struct fc_frame *fp; + struct ethhdr *eh; + struct vlan_ethhdr *vh; + struct fcoe_hdr *fcoe_hdr; + struct fcoe_crc_eof *ft; + u32 transport_len = 0; + + eh = (struct ethhdr *)skb->data; + vh = (struct vlan_ethhdr *)skb->data; + if (vh->h_vlan_proto == htons(ETH_P_8021Q) && + vh->h_vlan_encapsulated_proto == htons(ETH_P_FCOE)) { + skb_pull(skb, sizeof(struct vlan_ethhdr)); + transport_len += sizeof(struct vlan_ethhdr); + } else if (eh->h_proto == htons(ETH_P_FCOE)) { + transport_len += sizeof(struct ethhdr); + skb_pull(skb, sizeof(struct ethhdr)); + } else + return -1; + + fcoe_hdr = (struct fcoe_hdr *)skb->data; + if (FC_FCOE_DECAPS_VER(fcoe_hdr) != FC_FCOE_VER) + return -1; + + fp = (struct fc_frame *)skb; + fc_frame_init(fp); + fr_sof(fp) = fcoe_hdr->fcoe_sof; + skb_pull(skb, sizeof(struct fcoe_hdr)); + transport_len += sizeof(struct fcoe_hdr); + + ft = (struct fcoe_crc_eof *)(skb->data + len - + transport_len - sizeof(*ft)); + fr_eof(fp) = ft->fcoe_eof; + skb_trim(skb, len - transport_len - sizeof(*ft)); + return 0; +} + +static inline int fnic_handle_flogi_resp(struct fnic *fnic, + struct fc_frame *fp) +{ + u8 mac[ETH_ALEN] = FC_FCOE_FLOGI_MAC; + struct ethhdr *eth_hdr; + struct fc_frame_header *fh; + int ret = 0; + unsigned long flags; + struct fc_frame *old_flogi_resp = NULL; + + fh = (struct fc_frame_header *)fr_hdr(fp); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + + if (fnic->state == FNIC_IN_ETH_MODE) { + + /* + * Check if oxid matches on taking the lock. A new Flogi + * issued by libFC might have changed the fnic cached oxid + */ + if (fnic->flogi_oxid != ntohs(fh->fh_ox_id)) { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "Flogi response oxid not" + " matching cached oxid, dropping frame" + "\n"); + ret = -1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb_irq(fp_skb(fp)); + goto handle_flogi_resp_end; + } + + /* Drop older cached flogi response frame, cache this frame */ + old_flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = fp; + fnic->flogi_oxid = FC_XID_UNKNOWN; + + /* + * this frame is part of flogi get the src mac addr from this + * frame if the src mac is fcoui based then we mark the + * address mode flag to use fcoui base for dst mac addr + * otherwise we have to store the fcoe gateway addr + */ + eth_hdr = (struct ethhdr *)skb_mac_header(fp_skb(fp)); + memcpy(mac, eth_hdr->h_source, ETH_ALEN); + + if (ntoh24(mac) == FC_FCOE_OUI) + fnic->fcoui_mode = 1; + else { + fnic->fcoui_mode = 0; + memcpy(fnic->dest_addr, mac, ETH_ALEN); + } + + /* + * Except for Flogi frame, all outbound frames from us have the + * Eth Src address as FC_FCOE_OUI"our_sid". Flogi frame uses + * the vnic MAC address as the Eth Src address + */ + fc_fcoe_set_mac(fnic->data_src_addr, fh->fh_d_id); + + /* We get our s_id from the d_id of the flogi resp frame */ + fnic->s_id = ntoh24(fh->fh_d_id); + + /* Change state to reflect transition from Eth to FC mode */ + fnic->state = FNIC_IN_ETH_TRANS_FC_MODE; + + } else { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "Unexpected fnic state %s while" + " processing flogi resp\n", + fnic_state_to_str(fnic->state)); + ret = -1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb_irq(fp_skb(fp)); + goto handle_flogi_resp_end; + } + + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + /* Drop older cached frame */ + if (old_flogi_resp) + dev_kfree_skb_irq(fp_skb(old_flogi_resp)); + + /* + * send flogi reg request to firmware, this will put the fnic in + * in FC mode + */ + ret = fnic_flogi_reg_handler(fnic); + + if (ret < 0) { + int free_fp = 1; + spin_lock_irqsave(&fnic->fnic_lock, flags); + /* + * free the frame is some other thread is not + * pointing to it + */ + if (fnic->flogi_resp != fp) + free_fp = 0; + else + fnic->flogi_resp = NULL; + + if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE) + fnic->state = FNIC_IN_ETH_MODE; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + if (free_fp) + dev_kfree_skb_irq(fp_skb(fp)); + } + + handle_flogi_resp_end: + return ret; +} + +/* Returns 1 for a response that matches cached flogi oxid */ +static inline int is_matching_flogi_resp_frame(struct fnic *fnic, + struct fc_frame *fp) +{ + struct fc_frame_header *fh; + int ret = 0; + u32 f_ctl; + + fh = fc_frame_header_get(fp); + f_ctl = ntoh24(fh->fh_f_ctl); + + if (fnic->flogi_oxid == ntohs(fh->fh_ox_id) && + fh->fh_r_ctl == FC_RCTL_ELS_REP && + (f_ctl & (FC_FC_EX_CTX | FC_FC_SEQ_CTX)) == FC_FC_EX_CTX && + fh->fh_type == FC_TYPE_ELS) + ret = 1; + + return ret; +} + +static void fnic_rq_cmpl_frame_recv(struct vnic_rq *rq, struct cq_desc + *cq_desc, struct vnic_rq_buf *buf, + int skipped __attribute__((unused)), + void *opaque) +{ + struct fnic *fnic = vnic_dev_priv(rq->vdev); + struct sk_buff *skb; + struct fc_frame *fp; + unsigned int eth_hdrs_stripped; + u8 type, color, eop, sop, ingress_port, vlan_stripped; + u8 fcoe = 0, fcoe_sof, fcoe_eof; + u8 fcoe_fc_crc_ok = 1, fcoe_enc_error = 0; + u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; + u8 ipv6, ipv4, ipv4_fragment, rss_type, csum_not_calc; + u8 fcs_ok = 1, packet_error = 0; + u16 q_number, completed_index, bytes_written = 0, vlan, checksum; + u32 rss_hash; + u16 exchange_id, tmpl; + u8 sof = 0; + u8 eof = 0; + u32 fcp_bytes_written = 0; + unsigned long flags; + + pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len, + PCI_DMA_FROMDEVICE); + skb = buf->os_buf; + buf->os_buf = NULL; + + cq_desc_dec(cq_desc, &type, &color, &q_number, &completed_index); + if (type == CQ_DESC_TYPE_RQ_FCP) { + cq_fcp_rq_desc_dec((struct cq_fcp_rq_desc *)cq_desc, + &type, &color, &q_number, &completed_index, + &eop, &sop, &fcoe_fc_crc_ok, &exchange_id, + &tmpl, &fcp_bytes_written, &sof, &eof, + &ingress_port, &packet_error, + &fcoe_enc_error, &fcs_ok, &vlan_stripped, + &vlan); + eth_hdrs_stripped = 1; + + } else if (type == CQ_DESC_TYPE_RQ_ENET) { + cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, + &type, &color, &q_number, &completed_index, + &ingress_port, &fcoe, &eop, &sop, + &rss_type, &csum_not_calc, &rss_hash, + &bytes_written, &packet_error, + &vlan_stripped, &vlan, &checksum, + &fcoe_sof, &fcoe_fc_crc_ok, + &fcoe_enc_error, &fcoe_eof, + &tcp_udp_csum_ok, &udp, &tcp, + &ipv4_csum_ok, &ipv6, &ipv4, + &ipv4_fragment, &fcs_ok); + eth_hdrs_stripped = 0; + + } else { + /* wrong CQ type*/ + shost_printk(KERN_ERR, fnic->lport->host, + "fnic rq_cmpl wrong cq type x%x\n", type); + goto drop; + } + + if (!fcs_ok || packet_error || !fcoe_fc_crc_ok || fcoe_enc_error) { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "fnic rq_cmpl fcoe x%x fcsok x%x" + " pkterr x%x fcoe_fc_crc_ok x%x, fcoe_enc_err" + " x%x\n", + fcoe, fcs_ok, packet_error, + fcoe_fc_crc_ok, fcoe_enc_error); + goto drop; + } + + if (eth_hdrs_stripped) + fnic_import_rq_fc_frame(skb, fcp_bytes_written, sof, eof); + else if (fnic_import_rq_eth_pkt(skb, bytes_written)) + goto drop; + + fp = (struct fc_frame *)skb; + + /* + * If frame is an ELS response that matches the cached FLOGI OX_ID, + * and is accept, issue flogi_reg_request copy wq request to firmware + * to register the S_ID and determine whether FC_OUI mode or GW mode. + */ + if (is_matching_flogi_resp_frame(fnic, fp)) { + if (!eth_hdrs_stripped) { + if (fc_frame_payload_op(fp) == ELS_LS_ACC) { + fnic_handle_flogi_resp(fnic, fp); + return; + } + /* + * Recd. Flogi reject. No point registering + * with fw, but forward to libFC + */ + goto forward; + } + goto drop; + } + if (!eth_hdrs_stripped) + goto drop; + +forward: + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + goto drop; + } + /* Use fr_flags to indicate whether succ. flogi resp or not */ + fr_flags(fp) = 0; + fr_dev(fp) = fnic->lport; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + skb_queue_tail(&fnic->frame_queue, skb); + queue_work(fnic_event_queue, &fnic->frame_work); + + return; +drop: + dev_kfree_skb_irq(skb); +} + +static int fnic_rq_cmpl_handler_cont(struct vnic_dev *vdev, + struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, + void *opaque) +{ + struct fnic *fnic = vnic_dev_priv(vdev); + + vnic_rq_service(&fnic->rq[q_number], cq_desc, completed_index, + VNIC_RQ_RETURN_DESC, fnic_rq_cmpl_frame_recv, + NULL); + return 0; +} + +int fnic_rq_cmpl_handler(struct fnic *fnic, int rq_work_to_do) +{ + unsigned int tot_rq_work_done = 0, cur_work_done; + unsigned int i; + int err; + + for (i = 0; i < fnic->rq_count; i++) { + cur_work_done = vnic_cq_service(&fnic->cq[i], rq_work_to_do, + fnic_rq_cmpl_handler_cont, + NULL); + if (cur_work_done) { + err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame); + if (err) + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_alloc_rq_frame cant alloc" + " frame\n"); + } + tot_rq_work_done += cur_work_done; + } + + return tot_rq_work_done; +} + +/* + * This function is called once at init time to allocate and fill RQ + * buffers. Subsequently, it is called in the interrupt context after RQ + * buffer processing to replenish the buffers in the RQ + */ +int fnic_alloc_rq_frame(struct vnic_rq *rq) +{ + struct fnic *fnic = vnic_dev_priv(rq->vdev); + struct sk_buff *skb; + u16 len; + dma_addr_t pa; + + len = FC_FRAME_HEADROOM + FC_MAX_FRAME + FC_FRAME_TAILROOM; + skb = dev_alloc_skb(len); + if (!skb) { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "Unable to allocate RQ sk_buff\n"); + return -ENOMEM; + } + skb_reset_mac_header(skb); + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + skb_put(skb, len); + pa = pci_map_single(fnic->pdev, skb->data, len, PCI_DMA_FROMDEVICE); + fnic_queue_rq_desc(rq, skb, pa, len); + return 0; +} + +void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) +{ + struct fc_frame *fp = buf->os_buf; + struct fnic *fnic = vnic_dev_priv(rq->vdev); + + pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len, + PCI_DMA_FROMDEVICE); + + dev_kfree_skb(fp_skb(fp)); + buf->os_buf = NULL; +} + +static inline int is_flogi_frame(struct fc_frame_header *fh) +{ + return fh->fh_r_ctl == FC_RCTL_ELS_REQ && *(u8 *)(fh + 1) == ELS_FLOGI; +} + +int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp) +{ + struct vnic_wq *wq = &fnic->wq[0]; + struct sk_buff *skb; + dma_addr_t pa; + struct ethhdr *eth_hdr; + struct vlan_ethhdr *vlan_hdr; + struct fcoe_hdr *fcoe_hdr; + struct fc_frame_header *fh; + u32 tot_len, eth_hdr_len; + int ret = 0; + unsigned long flags; + + fh = fc_frame_header_get(fp); + skb = fp_skb(fp); + + if (!fnic->vlan_hw_insert) { + eth_hdr_len = sizeof(*vlan_hdr) + sizeof(*fcoe_hdr); + vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, eth_hdr_len); + eth_hdr = (struct ethhdr *)vlan_hdr; + vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q); + vlan_hdr->h_vlan_encapsulated_proto = htons(ETH_P_FCOE); + vlan_hdr->h_vlan_TCI = htons(fnic->vlan_id); + fcoe_hdr = (struct fcoe_hdr *)(vlan_hdr + 1); + } else { + eth_hdr_len = sizeof(*eth_hdr) + sizeof(*fcoe_hdr); + eth_hdr = (struct ethhdr *)skb_push(skb, eth_hdr_len); + eth_hdr->h_proto = htons(ETH_P_FCOE); + fcoe_hdr = (struct fcoe_hdr *)(eth_hdr + 1); + } + + if (is_flogi_frame(fh)) { + fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id); + memcpy(eth_hdr->h_source, fnic->mac_addr, ETH_ALEN); + } else { + if (fnic->fcoui_mode) + fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id); + else + memcpy(eth_hdr->h_dest, fnic->dest_addr, ETH_ALEN); + memcpy(eth_hdr->h_source, fnic->data_src_addr, ETH_ALEN); + } + + tot_len = skb->len; + BUG_ON(tot_len % 4); + + memset(fcoe_hdr, 0, sizeof(*fcoe_hdr)); + fcoe_hdr->fcoe_sof = fr_sof(fp); + if (FC_FCOE_VER) + FC_FCOE_ENCAPS_VER(fcoe_hdr, FC_FCOE_VER); + + pa = pci_map_single(fnic->pdev, eth_hdr, tot_len, PCI_DMA_TODEVICE); + + spin_lock_irqsave(&fnic->wq_lock[0], flags); + + if (!vnic_wq_desc_avail(wq)) { + pci_unmap_single(fnic->pdev, pa, + tot_len, PCI_DMA_TODEVICE); + ret = -1; + goto fnic_send_frame_end; + } + + fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp), + fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1); +fnic_send_frame_end: + spin_unlock_irqrestore(&fnic->wq_lock[0], flags); + + if (ret) + dev_kfree_skb_any(fp_skb(fp)); + + return ret; +} + +/* + * fnic_send + * Routine to send a raw frame + */ +int fnic_send(struct fc_lport *lp, struct fc_frame *fp) +{ + struct fnic *fnic = lport_priv(lp); + struct fc_frame_header *fh; + int ret = 0; + enum fnic_state old_state; + unsigned long flags; + struct fc_frame *old_flogi = NULL; + struct fc_frame *old_flogi_resp = NULL; + + if (fnic->in_remove) { + dev_kfree_skb(fp_skb(fp)); + ret = -1; + goto fnic_send_end; + } + + fh = fc_frame_header_get(fp); + /* if not an Flogi frame, send it out, this is the common case */ + if (!is_flogi_frame(fh)) + return fnic_send_frame(fnic, fp); + + /* Flogi frame, now enter the state machine */ + + spin_lock_irqsave(&fnic->fnic_lock, flags); +again: + /* Get any old cached frames, free them after dropping lock */ + old_flogi = fnic->flogi; + fnic->flogi = NULL; + old_flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + + fnic->flogi_oxid = FC_XID_UNKNOWN; + + old_state = fnic->state; + switch (old_state) { + case FNIC_IN_FC_MODE: + case FNIC_IN_ETH_TRANS_FC_MODE: + default: + fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; + vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (old_flogi) { + dev_kfree_skb(fp_skb(old_flogi)); + old_flogi = NULL; + } + if (old_flogi_resp) { + dev_kfree_skb(fp_skb(old_flogi_resp)); + old_flogi_resp = NULL; + } + + ret = fnic_fw_reset_handler(fnic); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->state != FNIC_IN_FC_TRANS_ETH_MODE) + goto again; + if (ret) { + fnic->state = old_state; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb(fp_skb(fp)); + goto fnic_send_end; + } + old_flogi = fnic->flogi; + fnic->flogi = fp; + fnic->flogi_oxid = ntohs(fh->fh_ox_id); + old_flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + break; + + case FNIC_IN_FC_TRANS_ETH_MODE: + /* + * A reset is pending with the firmware. Store the flogi + * and its oxid. The transition out of this state happens + * only when Firmware completes the reset, either with + * success or failed. If success, transition to + * FNIC_IN_ETH_MODE, if fail, then transition to + * FNIC_IN_FC_MODE + */ + fnic->flogi = fp; + fnic->flogi_oxid = ntohs(fh->fh_ox_id); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + break; + + case FNIC_IN_ETH_MODE: + /* + * The fw/hw is already in eth mode. Store the oxid, + * and send the flogi frame out. The transition out of this + * state happens only we receive flogi response from the + * network, and the oxid matches the cached oxid when the + * flogi frame was sent out. If they match, then we issue + * a flogi_reg request and transition to state + * FNIC_IN_ETH_TRANS_FC_MODE + */ + fnic->flogi_oxid = ntohs(fh->fh_ox_id); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + ret = fnic_send_frame(fnic, fp); + break; + } + +fnic_send_end: + if (old_flogi) + dev_kfree_skb(fp_skb(old_flogi)); + if (old_flogi_resp) + dev_kfree_skb(fp_skb(old_flogi_resp)); + return ret; +} + +static void fnic_wq_complete_frame_send(struct vnic_wq *wq, + struct cq_desc *cq_desc, + struct vnic_wq_buf *buf, void *opaque) +{ + struct sk_buff *skb = buf->os_buf; + struct fc_frame *fp = (struct fc_frame *)skb; + struct fnic *fnic = vnic_dev_priv(wq->vdev); + + pci_unmap_single(fnic->pdev, buf->dma_addr, + buf->len, PCI_DMA_TODEVICE); + dev_kfree_skb_irq(fp_skb(fp)); + buf->os_buf = NULL; +} + +static int fnic_wq_cmpl_handler_cont(struct vnic_dev *vdev, + struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, + void *opaque) +{ + struct fnic *fnic = vnic_dev_priv(vdev); + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_lock[q_number], flags); + vnic_wq_service(&fnic->wq[q_number], cq_desc, completed_index, + fnic_wq_complete_frame_send, NULL); + spin_unlock_irqrestore(&fnic->wq_lock[q_number], flags); + + return 0; +} + +int fnic_wq_cmpl_handler(struct fnic *fnic, int work_to_do) +{ + unsigned int wq_work_done = 0; + unsigned int i; + + for (i = 0; i < fnic->raw_wq_count; i++) { + wq_work_done += vnic_cq_service(&fnic->cq[fnic->rq_count+i], + work_to_do, + fnic_wq_cmpl_handler_cont, + NULL); + } + + return wq_work_done; +} + + +void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) +{ + struct fc_frame *fp = buf->os_buf; + struct fnic *fnic = vnic_dev_priv(wq->vdev); + + pci_unmap_single(fnic->pdev, buf->dma_addr, + buf->len, PCI_DMA_TODEVICE); + + dev_kfree_skb(fp_skb(fp)); + buf->os_buf = NULL; +} diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h new file mode 100644 index 000000000000..f0b896988cd5 --- /dev/null +++ b/drivers/scsi/fnic/fnic_io.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FNIC_IO_H_ +#define _FNIC_IO_H_ + +#include + +#define FNIC_DFLT_SG_DESC_CNT 32 +#define FNIC_MAX_SG_DESC_CNT 1024 /* Maximum descriptors per sgl */ +#define FNIC_SG_DESC_ALIGN 16 /* Descriptor address alignment */ + +struct host_sg_desc { + __le64 addr; + __le32 len; + u32 _resvd; +}; + +struct fnic_dflt_sgl_list { + struct host_sg_desc sg_desc[FNIC_DFLT_SG_DESC_CNT]; +}; + +struct fnic_sgl_list { + struct host_sg_desc sg_desc[FNIC_MAX_SG_DESC_CNT]; +}; + +enum fnic_sgl_list_type { + FNIC_SGL_CACHE_DFLT = 0, /* cache with default size sgl */ + FNIC_SGL_CACHE_MAX, /* cache with max size sgl */ + FNIC_SGL_NUM_CACHES /* number of sgl caches */ +}; + +enum fnic_ioreq_state { + FNIC_IOREQ_CMD_PENDING = 0, + FNIC_IOREQ_ABTS_PENDING, + FNIC_IOREQ_ABTS_COMPLETE, + FNIC_IOREQ_CMD_COMPLETE, +}; + +struct fnic_io_req { + struct host_sg_desc *sgl_list; /* sgl list */ + void *sgl_list_alloc; /* sgl list address used for free */ + dma_addr_t sense_buf_pa; /* dma address for sense buffer*/ + dma_addr_t sgl_list_pa; /* dma address for sgl list */ + u16 sgl_cnt; + u8 sgl_type; /* device DMA descriptor list type */ + u8 io_completed:1; /* set to 1 when fw completes IO */ + u32 port_id; /* remote port DID */ + struct completion *abts_done; /* completion for abts */ + struct completion *dr_done; /* completion for device reset */ +}; + +#endif /* _FNIC_IO_H_ */ diff --git a/drivers/scsi/fnic/fnic_isr.c b/drivers/scsi/fnic/fnic_isr.c new file mode 100644 index 000000000000..2b3064828aea --- /dev/null +++ b/drivers/scsi/fnic/fnic_isr.c @@ -0,0 +1,332 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include "vnic_dev.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "fnic_io.h" +#include "fnic.h" + +static irqreturn_t fnic_isr_legacy(int irq, void *data) +{ + struct fnic *fnic = data; + u32 pba; + unsigned long work_done = 0; + + pba = vnic_intr_legacy_pba(fnic->legacy_pba); + if (!pba) + return IRQ_NONE; + + if (pba & (1 << FNIC_INTX_NOTIFY)) { + vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_NOTIFY]); + fnic_handle_link_event(fnic); + } + + if (pba & (1 << FNIC_INTX_ERR)) { + vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_ERR]); + fnic_log_q_error(fnic); + } + + if (pba & (1 << FNIC_INTX_WQ_RQ_COPYWQ)) { + work_done += fnic_wq_copy_cmpl_handler(fnic, 8); + work_done += fnic_wq_cmpl_handler(fnic, 4); + work_done += fnic_rq_cmpl_handler(fnic, 4); + + vnic_intr_return_credits(&fnic->intr[FNIC_INTX_WQ_RQ_COPYWQ], + work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + } + + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msi(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long work_done = 0; + + work_done += fnic_wq_copy_cmpl_handler(fnic, 8); + work_done += fnic_wq_cmpl_handler(fnic, 4); + work_done += fnic_rq_cmpl_handler(fnic, 4); + + vnic_intr_return_credits(&fnic->intr[0], + work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_rq(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long rq_work_done = 0; + + rq_work_done = fnic_rq_cmpl_handler(fnic, 4); + vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_RQ], + rq_work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_wq(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long wq_work_done = 0; + + wq_work_done = fnic_wq_cmpl_handler(fnic, 4); + vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ], + wq_work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_wq_copy(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long wq_copy_work_done = 0; + + wq_copy_work_done = fnic_wq_copy_cmpl_handler(fnic, 8); + vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ_COPY], + wq_copy_work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_err_notify(int irq, void *data) +{ + struct fnic *fnic = data; + + vnic_intr_return_all_credits(&fnic->intr[FNIC_MSIX_ERR_NOTIFY]); + fnic_log_q_error(fnic); + fnic_handle_link_event(fnic); + + return IRQ_HANDLED; +} + +void fnic_free_intr(struct fnic *fnic) +{ + int i; + + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_INTX: + case VNIC_DEV_INTR_MODE_MSI: + free_irq(fnic->pdev->irq, fnic); + break; + + case VNIC_DEV_INTR_MODE_MSIX: + for (i = 0; i < ARRAY_SIZE(fnic->msix); i++) + if (fnic->msix[i].requested) + free_irq(fnic->msix_entry[i].vector, + fnic->msix[i].devid); + break; + + default: + break; + } +} + +int fnic_request_intr(struct fnic *fnic) +{ + int err = 0; + int i; + + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + + case VNIC_DEV_INTR_MODE_INTX: + err = request_irq(fnic->pdev->irq, &fnic_isr_legacy, + IRQF_SHARED, DRV_NAME, fnic); + break; + + case VNIC_DEV_INTR_MODE_MSI: + err = request_irq(fnic->pdev->irq, &fnic_isr_msi, + 0, fnic->name, fnic); + break; + + case VNIC_DEV_INTR_MODE_MSIX: + + sprintf(fnic->msix[FNIC_MSIX_RQ].devname, + "%.11s-fcs-rq", fnic->name); + fnic->msix[FNIC_MSIX_RQ].isr = fnic_isr_msix_rq; + fnic->msix[FNIC_MSIX_RQ].devid = fnic; + + sprintf(fnic->msix[FNIC_MSIX_WQ].devname, + "%.11s-fcs-wq", fnic->name); + fnic->msix[FNIC_MSIX_WQ].isr = fnic_isr_msix_wq; + fnic->msix[FNIC_MSIX_WQ].devid = fnic; + + sprintf(fnic->msix[FNIC_MSIX_WQ_COPY].devname, + "%.11s-scsi-wq", fnic->name); + fnic->msix[FNIC_MSIX_WQ_COPY].isr = fnic_isr_msix_wq_copy; + fnic->msix[FNIC_MSIX_WQ_COPY].devid = fnic; + + sprintf(fnic->msix[FNIC_MSIX_ERR_NOTIFY].devname, + "%.11s-err-notify", fnic->name); + fnic->msix[FNIC_MSIX_ERR_NOTIFY].isr = + fnic_isr_msix_err_notify; + fnic->msix[FNIC_MSIX_ERR_NOTIFY].devid = fnic; + + for (i = 0; i < ARRAY_SIZE(fnic->msix); i++) { + err = request_irq(fnic->msix_entry[i].vector, + fnic->msix[i].isr, 0, + fnic->msix[i].devname, + fnic->msix[i].devid); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "MSIX: request_irq" + " failed %d\n", err); + fnic_free_intr(fnic); + break; + } + fnic->msix[i].requested = 1; + } + break; + + default: + break; + } + + return err; +} + +int fnic_set_intr_mode(struct fnic *fnic) +{ + unsigned int n = ARRAY_SIZE(fnic->rq); + unsigned int m = ARRAY_SIZE(fnic->wq); + unsigned int o = ARRAY_SIZE(fnic->wq_copy); + unsigned int i; + + /* + * Set interrupt mode (INTx, MSI, MSI-X) depending + * system capabilities. + * + * Try MSI-X first + * + * We need n RQs, m WQs, o Copy WQs, n+m+o CQs, and n+m+o+1 INTRs + * (last INTR is used for WQ/RQ errors and notification area) + */ + + BUG_ON(ARRAY_SIZE(fnic->msix_entry) < n + m + o + 1); + for (i = 0; i < n + m + o + 1; i++) + fnic->msix_entry[i].entry = i; + + if (fnic->rq_count >= n && + fnic->raw_wq_count >= m && + fnic->wq_copy_count >= o && + fnic->cq_count >= n + m + o) { + if (!pci_enable_msix(fnic->pdev, fnic->msix_entry, + n + m + o + 1)) { + fnic->rq_count = n; + fnic->raw_wq_count = m; + fnic->wq_copy_count = o; + fnic->wq_count = m + o; + fnic->cq_count = n + m + o; + fnic->intr_count = n + m + o + 1; + fnic->err_intr_offset = FNIC_MSIX_ERR_NOTIFY; + + FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host, + "Using MSI-X Interrupts\n"); + vnic_dev_set_intr_mode(fnic->vdev, + VNIC_DEV_INTR_MODE_MSIX); + return 0; + } + } + + /* + * Next try MSI + * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 1 INTR + */ + if (fnic->rq_count >= 1 && + fnic->raw_wq_count >= 1 && + fnic->wq_copy_count >= 1 && + fnic->cq_count >= 3 && + fnic->intr_count >= 1 && + !pci_enable_msi(fnic->pdev)) { + + fnic->rq_count = 1; + fnic->raw_wq_count = 1; + fnic->wq_copy_count = 1; + fnic->wq_count = 2; + fnic->cq_count = 3; + fnic->intr_count = 1; + fnic->err_intr_offset = 0; + + FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host, + "Using MSI Interrupts\n"); + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_MSI); + + return 0; + } + + /* + * Next try INTx + * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 3 INTRs + * 1 INTR is used for all 3 queues, 1 INTR for queue errors + * 1 INTR for notification area + */ + + if (fnic->rq_count >= 1 && + fnic->raw_wq_count >= 1 && + fnic->wq_copy_count >= 1 && + fnic->cq_count >= 3 && + fnic->intr_count >= 3) { + + fnic->rq_count = 1; + fnic->raw_wq_count = 1; + fnic->wq_copy_count = 1; + fnic->cq_count = 3; + fnic->intr_count = 3; + + FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host, + "Using Legacy Interrupts\n"); + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX); + + return 0; + } + + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); + + return -EINVAL; +} + +void fnic_clear_intr_mode(struct fnic *fnic) +{ + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_MSIX: + pci_disable_msix(fnic->pdev); + break; + case VNIC_DEV_INTR_MODE_MSI: + pci_disable_msi(fnic->pdev); + break; + default: + break; + } + + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX); +} + diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c new file mode 100644 index 000000000000..32ef6b87d895 --- /dev/null +++ b/drivers/scsi/fnic/fnic_main.c @@ -0,0 +1,942 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vnic_dev.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "fnic_io.h" +#include "fnic.h" + +#define PCI_DEVICE_ID_CISCO_FNIC 0x0045 + +/* Timer to poll notification area for events. Used for MSI interrupts */ +#define FNIC_NOTIFY_TIMER_PERIOD (2 * HZ) + +static struct kmem_cache *fnic_sgl_cache[FNIC_SGL_NUM_CACHES]; +static struct kmem_cache *fnic_io_req_cache; +LIST_HEAD(fnic_list); +DEFINE_SPINLOCK(fnic_list_lock); + +/* Supported devices by fnic module */ +static struct pci_device_id fnic_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_FNIC) }, + { 0, } +}; + +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR("Abhijeet Joglekar , " + "Joseph R. Eykholt "); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, fnic_id_table); + +unsigned int fnic_log_level; +module_param(fnic_log_level, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(fnic_log_level, "bit mask of fnic logging levels"); + + +static struct libfc_function_template fnic_transport_template = { + .frame_send = fnic_send, + .fcp_abort_io = fnic_empty_scsi_cleanup, + .fcp_cleanup = fnic_empty_scsi_cleanup, + .exch_mgr_reset = fnic_exch_mgr_reset +}; + +static int fnic_slave_alloc(struct scsi_device *sdev) +{ + struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); + struct fc_lport *lp = shost_priv(sdev->host); + struct fnic *fnic = lport_priv(lp); + + sdev->tagged_supported = 1; + + if (!rport || fc_remote_port_chkready(rport)) + return -ENXIO; + + scsi_activate_tcq(sdev, FNIC_DFLT_QUEUE_DEPTH); + rport->dev_loss_tmo = fnic->config.port_down_timeout / 1000; + + return 0; +} + +static struct scsi_host_template fnic_host_template = { + .module = THIS_MODULE, + .name = DRV_NAME, + .queuecommand = fnic_queuecommand, + .eh_abort_handler = fnic_abort_cmd, + .eh_device_reset_handler = fnic_device_reset, + .eh_host_reset_handler = fnic_host_reset, + .slave_alloc = fnic_slave_alloc, + .change_queue_depth = fc_change_queue_depth, + .change_queue_type = fc_change_queue_type, + .this_id = -1, + .cmd_per_lun = 3, + .can_queue = FNIC_MAX_IO_REQ, + .use_clustering = ENABLE_CLUSTERING, + .sg_tablesize = FNIC_MAX_SG_DESC_CNT, + .max_sectors = 0xffff, + .shost_attrs = fnic_attrs, +}; + +static void fnic_get_host_speed(struct Scsi_Host *shost); +static struct scsi_transport_template *fnic_fc_transport; +static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *); + +static struct fc_function_template fnic_fc_functions = { + + .show_host_node_name = 1, + .show_host_port_name = 1, + .show_host_supported_classes = 1, + .show_host_supported_fc4s = 1, + .show_host_active_fc4s = 1, + .show_host_maxframe_size = 1, + .show_host_port_id = 1, + .show_host_supported_speeds = 1, + .get_host_speed = fnic_get_host_speed, + .show_host_speed = 1, + .show_host_port_type = 1, + .get_host_port_state = fc_get_host_port_state, + .show_host_port_state = 1, + .show_host_symbolic_name = 1, + .show_rport_maxframe_size = 1, + .show_rport_supported_classes = 1, + .show_host_fabric_name = 1, + .show_starget_node_name = 1, + .show_starget_port_name = 1, + .show_starget_port_id = 1, + .show_rport_dev_loss_tmo = 1, + .issue_fc_host_lip = fnic_reset, + .get_fc_host_stats = fnic_get_stats, + .dd_fcrport_size = sizeof(struct fc_rport_libfc_priv), + .terminate_rport_io = fnic_terminate_rport_io, +}; + +static void fnic_get_host_speed(struct Scsi_Host *shost) +{ + struct fc_lport *lp = shost_priv(shost); + struct fnic *fnic = lport_priv(lp); + u32 port_speed = vnic_dev_port_speed(fnic->vdev); + + /* Add in other values as they get defined in fw */ + switch (port_speed) { + case 10000: + fc_host_speed(shost) = FC_PORTSPEED_10GBIT; + break; + default: + fc_host_speed(shost) = FC_PORTSPEED_10GBIT; + break; + } +} + +static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *host) +{ + int ret; + struct fc_lport *lp = shost_priv(host); + struct fnic *fnic = lport_priv(lp); + struct fc_host_statistics *stats = &lp->host_stats; + struct vnic_stats *vs; + unsigned long flags; + + if (time_before(jiffies, fnic->stats_time + HZ / FNIC_STATS_RATE_LIMIT)) + return stats; + fnic->stats_time = jiffies; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + ret = vnic_dev_stats_dump(fnic->vdev, &fnic->stats); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (ret) { + FNIC_MAIN_DBG(KERN_DEBUG, fnic->lport->host, + "fnic: Get vnic stats failed" + " 0x%x", ret); + return stats; + } + vs = fnic->stats; + stats->tx_frames = vs->tx.tx_unicast_frames_ok; + stats->tx_words = vs->tx.tx_unicast_bytes_ok / 4; + stats->rx_frames = vs->rx.rx_unicast_frames_ok; + stats->rx_words = vs->rx.rx_unicast_bytes_ok / 4; + stats->error_frames = vs->tx.tx_errors + vs->rx.rx_errors; + stats->dumped_frames = vs->tx.tx_drops + vs->rx.rx_drop; + stats->invalid_crc_count = vs->rx.rx_crc_errors; + stats->seconds_since_last_reset = (jiffies - lp->boot_time) / HZ; + stats->fcp_input_megabytes = div_u64(fnic->fcp_input_bytes, 1000000); + stats->fcp_output_megabytes = div_u64(fnic->fcp_output_bytes, 1000000); + + return stats; +} + +void fnic_log_q_error(struct fnic *fnic) +{ + unsigned int i; + u32 error_status; + + for (i = 0; i < fnic->raw_wq_count; i++) { + error_status = ioread32(&fnic->wq[i].ctrl->error_status); + if (error_status) + shost_printk(KERN_ERR, fnic->lport->host, + "WQ[%d] error_status" + " %d\n", i, error_status); + } + + for (i = 0; i < fnic->rq_count; i++) { + error_status = ioread32(&fnic->rq[i].ctrl->error_status); + if (error_status) + shost_printk(KERN_ERR, fnic->lport->host, + "RQ[%d] error_status" + " %d\n", i, error_status); + } + + for (i = 0; i < fnic->wq_copy_count; i++) { + error_status = ioread32(&fnic->wq_copy[i].ctrl->error_status); + if (error_status) + shost_printk(KERN_ERR, fnic->lport->host, + "CWQ[%d] error_status" + " %d\n", i, error_status); + } +} + +void fnic_handle_link_event(struct fnic *fnic) +{ + unsigned long flags; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + return; + } + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + queue_work(fnic_event_queue, &fnic->link_work); + +} + +static int fnic_notify_set(struct fnic *fnic) +{ + int err; + + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_INTX: + err = vnic_dev_notify_set(fnic->vdev, FNIC_INTX_NOTIFY); + break; + case VNIC_DEV_INTR_MODE_MSI: + err = vnic_dev_notify_set(fnic->vdev, -1); + break; + case VNIC_DEV_INTR_MODE_MSIX: + err = vnic_dev_notify_set(fnic->vdev, FNIC_MSIX_ERR_NOTIFY); + break; + default: + shost_printk(KERN_ERR, fnic->lport->host, + "Interrupt mode should be set up" + " before devcmd notify set %d\n", + vnic_dev_get_intr_mode(fnic->vdev)); + err = -1; + break; + } + + return err; +} + +static void fnic_notify_timer(unsigned long data) +{ + struct fnic *fnic = (struct fnic *)data; + + fnic_handle_link_event(fnic); + mod_timer(&fnic->notify_timer, + round_jiffies(jiffies + FNIC_NOTIFY_TIMER_PERIOD)); +} + +static void fnic_notify_timer_start(struct fnic *fnic) +{ + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_MSI: + /* + * Schedule first timeout immediately. The driver is + * initiatialized and ready to look for link up notification + */ + mod_timer(&fnic->notify_timer, jiffies); + break; + default: + /* Using intr for notification for INTx/MSI-X */ + break; + }; +} + +static int fnic_dev_wait(struct vnic_dev *vdev, + int (*start)(struct vnic_dev *, int), + int (*finished)(struct vnic_dev *, int *), + int arg) +{ + unsigned long time; + int done; + int err; + + err = start(vdev, arg); + if (err) + return err; + + /* Wait for func to complete...2 seconds max */ + time = jiffies + (HZ * 2); + do { + err = finished(vdev, &done); + if (err) + return err; + if (done) + return 0; + schedule_timeout_uninterruptible(HZ / 10); + } while (time_after(time, jiffies)); + + return -ETIMEDOUT; +} + +static int fnic_cleanup(struct fnic *fnic) +{ + unsigned int i; + int err; + unsigned long flags; + struct fc_frame *flogi = NULL; + struct fc_frame *flogi_resp = NULL; + + vnic_dev_disable(fnic->vdev); + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_mask(&fnic->intr[i]); + + for (i = 0; i < fnic->rq_count; i++) { + err = vnic_rq_disable(&fnic->rq[i]); + if (err) + return err; + } + for (i = 0; i < fnic->raw_wq_count; i++) { + err = vnic_wq_disable(&fnic->wq[i]); + if (err) + return err; + } + for (i = 0; i < fnic->wq_copy_count; i++) { + err = vnic_wq_copy_disable(&fnic->wq_copy[i]); + if (err) + return err; + } + + /* Clean up completed IOs and FCS frames */ + fnic_wq_copy_cmpl_handler(fnic, -1); + fnic_wq_cmpl_handler(fnic, -1); + fnic_rq_cmpl_handler(fnic, -1); + + /* Clean up the IOs and FCS frames that have not completed */ + for (i = 0; i < fnic->raw_wq_count; i++) + vnic_wq_clean(&fnic->wq[i], fnic_free_wq_buf); + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf); + for (i = 0; i < fnic->wq_copy_count; i++) + vnic_wq_copy_clean(&fnic->wq_copy[i], + fnic_wq_copy_cleanup_handler); + + for (i = 0; i < fnic->cq_count; i++) + vnic_cq_clean(&fnic->cq[i]); + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_clean(&fnic->intr[i]); + + /* + * Remove cached flogi and flogi resp frames if any + * These frames are not in any queue, and therefore queue + * cleanup does not clean them. So clean them explicitly + */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + flogi = fnic->flogi; + fnic->flogi = NULL; + flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (flogi) + dev_kfree_skb(fp_skb(flogi)); + + if (flogi_resp) + dev_kfree_skb(fp_skb(flogi_resp)); + + mempool_destroy(fnic->io_req_pool); + for (i = 0; i < FNIC_SGL_NUM_CACHES; i++) + mempool_destroy(fnic->io_sgl_pool[i]); + + return 0; +} + +static void fnic_iounmap(struct fnic *fnic) +{ + if (fnic->bar0.vaddr) + iounmap(fnic->bar0.vaddr); +} + +/* + * Allocate element for mempools requiring GFP_DMA flag. + * Otherwise, checks in kmem_flagcheck() hit BUG_ON(). + */ +static void *fnic_alloc_slab_dma(gfp_t gfp_mask, void *pool_data) +{ + struct kmem_cache *mem = pool_data; + + return kmem_cache_alloc(mem, gfp_mask | GFP_ATOMIC | GFP_DMA); +} + +static int __devinit fnic_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct Scsi_Host *host; + struct fc_lport *lp; + struct fnic *fnic; + mempool_t *pool; + int err; + int i; + unsigned long flags; + + /* + * Allocate SCSI Host and set up association between host, + * local port, and fnic + */ + host = scsi_host_alloc(&fnic_host_template, + sizeof(struct fc_lport) + sizeof(struct fnic)); + if (!host) { + printk(KERN_ERR PFX "Unable to alloc SCSI host\n"); + err = -ENOMEM; + goto err_out; + } + lp = shost_priv(host); + lp->host = host; + fnic = lport_priv(lp); + fnic->lport = lp; + + snprintf(fnic->name, sizeof(fnic->name) - 1, "%s%d", DRV_NAME, + host->host_no); + + host->transportt = fnic_fc_transport; + + err = scsi_init_shared_tag_map(host, FNIC_MAX_IO_REQ); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to alloc shared tag map\n"); + goto err_out_free_hba; + } + + /* Setup PCI resources */ + pci_set_drvdata(pdev, fnic); + + fnic->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Cannot enable PCI device, aborting.\n"); + goto err_out_free_hba; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Cannot enable PCI resources, aborting\n"); + goto err_out_disable_device; + } + + pci_set_master(pdev); + + /* Query PCI controller on system for DMA addressing + * limitation for the device. Try 40-bit first, and + * fail to 32-bit. + */ + err = pci_set_dma_mask(pdev, DMA_40BIT_MASK); + if (err) { + err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "No usable DMA configuration " + "aborting\n"); + goto err_out_release_regions; + } + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to obtain 32-bit DMA " + "for consistent allocations, aborting.\n"); + goto err_out_release_regions; + } + } else { + err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to obtain 40-bit DMA " + "for consistent allocations, aborting.\n"); + goto err_out_release_regions; + } + } + + /* Map vNIC resources from BAR0 */ + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + shost_printk(KERN_ERR, fnic->lport->host, + "BAR0 not memory-map'able, aborting.\n"); + err = -ENODEV; + goto err_out_release_regions; + } + + fnic->bar0.vaddr = pci_iomap(pdev, 0, 0); + fnic->bar0.bus_addr = pci_resource_start(pdev, 0); + fnic->bar0.len = pci_resource_len(pdev, 0); + + if (!fnic->bar0.vaddr) { + shost_printk(KERN_ERR, fnic->lport->host, + "Cannot memory-map BAR0 res hdr, " + "aborting.\n"); + err = -ENODEV; + goto err_out_release_regions; + } + + fnic->vdev = vnic_dev_register(NULL, fnic, pdev, &fnic->bar0); + if (!fnic->vdev) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC registration failed, " + "aborting.\n"); + err = -ENODEV; + goto err_out_iounmap; + } + + err = fnic_dev_wait(fnic->vdev, vnic_dev_open, + vnic_dev_open_done, 0); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC dev open failed, aborting.\n"); + goto err_out_vnic_unregister; + } + + err = vnic_dev_init(fnic->vdev, 0); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC dev init failed, aborting.\n"); + goto err_out_dev_close; + } + + err = vnic_dev_mac_addr(fnic->vdev, fnic->mac_addr); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC get MAC addr failed \n"); + goto err_out_dev_close; + } + + /* Get vNIC configuration */ + err = fnic_get_vnic_config(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Get vNIC configuration failed, " + "aborting.\n"); + goto err_out_dev_close; + } + host->max_lun = fnic->config.luns_per_tgt; + host->max_id = FNIC_MAX_FCP_TARGET; + + fnic_get_res_counts(fnic); + + err = fnic_set_intr_mode(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to set intr mode, " + "aborting.\n"); + goto err_out_dev_close; + } + + err = fnic_request_intr(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to request irq.\n"); + goto err_out_clear_intr; + } + + err = fnic_alloc_vnic_resources(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to alloc vNIC resources, " + "aborting.\n"); + goto err_out_free_intr; + } + + + /* initialize all fnic locks */ + spin_lock_init(&fnic->fnic_lock); + + for (i = 0; i < FNIC_WQ_MAX; i++) + spin_lock_init(&fnic->wq_lock[i]); + + for (i = 0; i < FNIC_WQ_COPY_MAX; i++) { + spin_lock_init(&fnic->wq_copy_lock[i]); + fnic->wq_copy_desc_low[i] = DESC_CLEAN_LOW_WATERMARK; + fnic->fw_ack_recd[i] = 0; + fnic->fw_ack_index[i] = -1; + } + + for (i = 0; i < FNIC_IO_LOCKS; i++) + spin_lock_init(&fnic->io_req_lock[i]); + + fnic->io_req_pool = mempool_create_slab_pool(2, fnic_io_req_cache); + if (!fnic->io_req_pool) + goto err_out_free_resources; + + pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab, + fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); + if (!pool) + goto err_out_free_ioreq_pool; + fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT] = pool; + + pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab, + fnic_sgl_cache[FNIC_SGL_CACHE_MAX]); + if (!pool) + goto err_out_free_dflt_pool; + fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX] = pool; + + /* setup vlan config, hw inserts vlan header */ + fnic->vlan_hw_insert = 1; + fnic->vlan_id = 0; + + fnic->flogi_oxid = FC_XID_UNKNOWN; + fnic->flogi = NULL; + fnic->flogi_resp = NULL; + fnic->state = FNIC_IN_FC_MODE; + + /* Enable hardware stripping of vlan header on ingress */ + fnic_set_nic_config(fnic, 0, 0, 0, 0, 0, 0, 1); + + /* Setup notification buffer area */ + err = fnic_notify_set(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to alloc notify buffer, aborting.\n"); + goto err_out_free_max_pool; + } + + /* Setup notify timer when using MSI interrupts */ + if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI) + setup_timer(&fnic->notify_timer, + fnic_notify_timer, (unsigned long)fnic); + + /* allocate RQ buffers and post them to RQ*/ + for (i = 0; i < fnic->rq_count; i++) { + err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_alloc_rq_frame can't alloc " + "frame\n"); + goto err_out_free_rq_buf; + } + } + + /* + * Initialization done with PCI system, hardware, firmware. + * Add host to SCSI + */ + err = scsi_add_host(lp->host, &pdev->dev); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic: scsi_add_host failed...exiting\n"); + goto err_out_free_rq_buf; + } + + /* Start local port initiatialization */ + + lp->link_up = 0; + lp->tt = fnic_transport_template; + + lp->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3, + FCPIO_HOST_EXCH_RANGE_START, + FCPIO_HOST_EXCH_RANGE_END); + if (!lp->emp) { + err = -ENOMEM; + goto err_out_remove_scsi_host; + } + + lp->max_retry_count = fnic->config.flogi_retries; + lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS | + FCP_SPPF_CONF_COMPL); + if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR) + lp->service_params |= FCP_SPPF_RETRY; + + lp->boot_time = jiffies; + lp->e_d_tov = fnic->config.ed_tov; + lp->r_a_tov = fnic->config.ra_tov; + lp->link_supported_speeds = FC_PORTSPEED_10GBIT; + fc_set_wwnn(lp, fnic->config.node_wwn); + fc_set_wwpn(lp, fnic->config.port_wwn); + + fc_exch_init(lp); + fc_lport_init(lp); + fc_elsct_init(lp); + fc_rport_init(lp); + fc_disc_init(lp); + + fc_lport_config(lp); + + if (fc_set_mfs(lp, fnic->config.maxdatafieldsize + + sizeof(struct fc_frame_header))) { + err = -EINVAL; + goto err_out_free_exch_mgr; + } + fc_host_maxframe_size(lp->host) = lp->mfs; + + sprintf(fc_host_symbolic_name(lp->host), + DRV_NAME " v" DRV_VERSION " over %s", fnic->name); + + spin_lock_irqsave(&fnic_list_lock, flags); + list_add_tail(&fnic->list, &fnic_list); + spin_unlock_irqrestore(&fnic_list_lock, flags); + + INIT_WORK(&fnic->link_work, fnic_handle_link); + INIT_WORK(&fnic->frame_work, fnic_handle_frame); + skb_queue_head_init(&fnic->frame_queue); + + /* Enable all queues */ + for (i = 0; i < fnic->raw_wq_count; i++) + vnic_wq_enable(&fnic->wq[i]); + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_enable(&fnic->rq[i]); + for (i = 0; i < fnic->wq_copy_count; i++) + vnic_wq_copy_enable(&fnic->wq_copy[i]); + + fc_fabric_login(lp); + + vnic_dev_enable(fnic->vdev); + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_unmask(&fnic->intr[i]); + + fnic_notify_timer_start(fnic); + + return 0; + +err_out_free_exch_mgr: + fc_exch_mgr_free(lp->emp); +err_out_remove_scsi_host: + fc_remove_host(fnic->lport->host); + scsi_remove_host(fnic->lport->host); +err_out_free_rq_buf: + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf); + vnic_dev_notify_unset(fnic->vdev); +err_out_free_max_pool: + mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX]); +err_out_free_dflt_pool: + mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT]); +err_out_free_ioreq_pool: + mempool_destroy(fnic->io_req_pool); +err_out_free_resources: + fnic_free_vnic_resources(fnic); +err_out_free_intr: + fnic_free_intr(fnic); +err_out_clear_intr: + fnic_clear_intr_mode(fnic); +err_out_dev_close: + vnic_dev_close(fnic->vdev); +err_out_vnic_unregister: + vnic_dev_unregister(fnic->vdev); +err_out_iounmap: + fnic_iounmap(fnic); +err_out_release_regions: + pci_release_regions(pdev); +err_out_disable_device: + pci_disable_device(pdev); +err_out_free_hba: + scsi_host_put(lp->host); +err_out: + return err; +} + +static void __devexit fnic_remove(struct pci_dev *pdev) +{ + struct fnic *fnic = pci_get_drvdata(pdev); + unsigned long flags; + + /* + * Mark state so that the workqueue thread stops forwarding + * received frames and link events to the local port. ISR and + * other threads that can queue work items will also stop + * creating work items on the fnic workqueue + */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->stop_rx_link_events = 1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI) + del_timer_sync(&fnic->notify_timer); + + /* + * Flush the fnic event queue. After this call, there should + * be no event queued for this fnic device in the workqueue + */ + flush_workqueue(fnic_event_queue); + skb_queue_purge(&fnic->frame_queue); + + /* + * Log off the fabric. This stops all remote ports, dns port, + * logs off the fabric. This flushes all rport, disc, lport work + * before returning + */ + fc_fabric_logoff(fnic->lport); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->in_remove = 1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + fc_lport_destroy(fnic->lport); + + /* + * This stops the fnic device, masks all interrupts. Completed + * CQ entries are drained. Posted WQ/RQ/Copy-WQ entries are + * cleaned up + */ + fnic_cleanup(fnic); + + BUG_ON(!skb_queue_empty(&fnic->frame_queue)); + + spin_lock_irqsave(&fnic_list_lock, flags); + list_del(&fnic->list); + spin_unlock_irqrestore(&fnic_list_lock, flags); + + fc_remove_host(fnic->lport->host); + scsi_remove_host(fnic->lport->host); + fc_exch_mgr_free(fnic->lport->emp); + vnic_dev_notify_unset(fnic->vdev); + fnic_free_vnic_resources(fnic); + fnic_free_intr(fnic); + fnic_clear_intr_mode(fnic); + vnic_dev_close(fnic->vdev); + vnic_dev_unregister(fnic->vdev); + fnic_iounmap(fnic); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + scsi_host_put(fnic->lport->host); +} + +static struct pci_driver fnic_driver = { + .name = DRV_NAME, + .id_table = fnic_id_table, + .probe = fnic_probe, + .remove = __devexit_p(fnic_remove), +}; + +static int __init fnic_init_module(void) +{ + size_t len; + int err = 0; + + printk(KERN_INFO PFX "%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION); + + /* Create a cache for allocation of default size sgls */ + len = sizeof(struct fnic_dflt_sgl_list); + fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create + ("fnic_sgl_dflt", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, + NULL); + if (!fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]) { + printk(KERN_ERR PFX "failed to create fnic dflt sgl slab\n"); + err = -ENOMEM; + goto err_create_fnic_sgl_slab_dflt; + } + + /* Create a cache for allocation of max size sgls*/ + len = sizeof(struct fnic_sgl_list); + fnic_sgl_cache[FNIC_SGL_CACHE_MAX] = kmem_cache_create + ("fnic_sgl_max", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, + NULL); + if (!fnic_sgl_cache[FNIC_SGL_CACHE_MAX]) { + printk(KERN_ERR PFX "failed to create fnic max sgl slab\n"); + err = -ENOMEM; + goto err_create_fnic_sgl_slab_max; + } + + /* Create a cache of io_req structs for use via mempool */ + fnic_io_req_cache = kmem_cache_create("fnic_io_req", + sizeof(struct fnic_io_req), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!fnic_io_req_cache) { + printk(KERN_ERR PFX "failed to create fnic io_req slab\n"); + err = -ENOMEM; + goto err_create_fnic_ioreq_slab; + } + + fnic_event_queue = create_singlethread_workqueue("fnic_event_wq"); + if (!fnic_event_queue) { + printk(KERN_ERR PFX "fnic work queue create failed\n"); + err = -ENOMEM; + goto err_create_fnic_workq; + } + + spin_lock_init(&fnic_list_lock); + INIT_LIST_HEAD(&fnic_list); + + fnic_fc_transport = fc_attach_transport(&fnic_fc_functions); + if (!fnic_fc_transport) { + printk(KERN_ERR PFX "fc_attach_transport error\n"); + err = -ENOMEM; + goto err_fc_transport; + } + + /* register the driver with PCI system */ + err = pci_register_driver(&fnic_driver); + if (err < 0) { + printk(KERN_ERR PFX "pci register error\n"); + goto err_pci_register; + } + return err; + +err_pci_register: + fc_release_transport(fnic_fc_transport); +err_fc_transport: + destroy_workqueue(fnic_event_queue); +err_create_fnic_workq: + kmem_cache_destroy(fnic_io_req_cache); +err_create_fnic_ioreq_slab: + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]); +err_create_fnic_sgl_slab_max: + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); +err_create_fnic_sgl_slab_dflt: + return err; +} + +static void __exit fnic_cleanup_module(void) +{ + pci_unregister_driver(&fnic_driver); + destroy_workqueue(fnic_event_queue); + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]); + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); + kmem_cache_destroy(fnic_io_req_cache); + fc_release_transport(fnic_fc_transport); +} + +module_init(fnic_init_module); +module_exit(fnic_cleanup_module); + diff --git a/drivers/scsi/fnic/fnic_res.c b/drivers/scsi/fnic/fnic_res.c new file mode 100644 index 000000000000..7ba61ec715d2 --- /dev/null +++ b/drivers/scsi/fnic/fnic_res.c @@ -0,0 +1,444 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "cq_enet_desc.h" +#include "vnic_resource.h" +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "vnic_nic.h" +#include "fnic.h" + +int fnic_get_vnic_config(struct fnic *fnic) +{ + struct vnic_fc_config *c = &fnic->config; + int err; + +#define GET_CONFIG(m) \ + do { \ + err = vnic_dev_spec(fnic->vdev, \ + offsetof(struct vnic_fc_config, m), \ + sizeof(c->m), &c->m); \ + if (err) { \ + shost_printk(KERN_ERR, fnic->lport->host, \ + "Error getting %s, %d\n", #m, \ + err); \ + return err; \ + } \ + } while (0); + + GET_CONFIG(node_wwn); + GET_CONFIG(port_wwn); + GET_CONFIG(wq_enet_desc_count); + GET_CONFIG(wq_copy_desc_count); + GET_CONFIG(rq_desc_count); + GET_CONFIG(maxdatafieldsize); + GET_CONFIG(ed_tov); + GET_CONFIG(ra_tov); + GET_CONFIG(intr_timer); + GET_CONFIG(intr_timer_type); + GET_CONFIG(flags); + GET_CONFIG(flogi_retries); + GET_CONFIG(flogi_timeout); + GET_CONFIG(plogi_retries); + GET_CONFIG(plogi_timeout); + GET_CONFIG(io_throttle_count); + GET_CONFIG(link_down_timeout); + GET_CONFIG(port_down_timeout); + GET_CONFIG(port_down_io_retries); + GET_CONFIG(luns_per_tgt); + + c->wq_enet_desc_count = + min_t(u32, VNIC_FNIC_WQ_DESCS_MAX, + max_t(u32, VNIC_FNIC_WQ_DESCS_MIN, + c->wq_enet_desc_count)); + c->wq_enet_desc_count = ALIGN(c->wq_enet_desc_count, 16); + + c->wq_copy_desc_count = + min_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MAX, + max_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MIN, + c->wq_copy_desc_count)); + c->wq_copy_desc_count = ALIGN(c->wq_copy_desc_count, 16); + + c->rq_desc_count = + min_t(u32, VNIC_FNIC_RQ_DESCS_MAX, + max_t(u32, VNIC_FNIC_RQ_DESCS_MIN, + c->rq_desc_count)); + c->rq_desc_count = ALIGN(c->rq_desc_count, 16); + + c->maxdatafieldsize = + min_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MAX, + max_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MIN, + c->maxdatafieldsize)); + c->ed_tov = + min_t(u32, VNIC_FNIC_EDTOV_MAX, + max_t(u32, VNIC_FNIC_EDTOV_MIN, + c->ed_tov)); + + c->ra_tov = + min_t(u32, VNIC_FNIC_RATOV_MAX, + max_t(u32, VNIC_FNIC_RATOV_MIN, + c->ra_tov)); + + c->flogi_retries = + min_t(u32, VNIC_FNIC_FLOGI_RETRIES_MAX, c->flogi_retries); + + c->flogi_timeout = + min_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MAX, + max_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MIN, + c->flogi_timeout)); + + c->plogi_retries = + min_t(u32, VNIC_FNIC_PLOGI_RETRIES_MAX, c->plogi_retries); + + c->plogi_timeout = + min_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MAX, + max_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MIN, + c->plogi_timeout)); + + c->io_throttle_count = + min_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MAX, + max_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MIN, + c->io_throttle_count)); + + c->link_down_timeout = + min_t(u32, VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX, + c->link_down_timeout); + + c->port_down_timeout = + min_t(u32, VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX, + c->port_down_timeout); + + c->port_down_io_retries = + min_t(u32, VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX, + c->port_down_io_retries); + + c->luns_per_tgt = + min_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MAX, + max_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MIN, + c->luns_per_tgt)); + + c->intr_timer = min_t(u16, VNIC_INTR_TIMER_MAX, c->intr_timer); + c->intr_timer_type = c->intr_timer_type; + + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC MAC addr %02x:%02x:%02x:%02x:%02x:%02x " + "wq/wq_copy/rq %d/%d/%d\n", + fnic->mac_addr[0], fnic->mac_addr[1], fnic->mac_addr[2], + fnic->mac_addr[3], fnic->mac_addr[4], fnic->mac_addr[5], + c->wq_enet_desc_count, c->wq_copy_desc_count, + c->rq_desc_count); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC node wwn %llx port wwn %llx\n", + c->node_wwn, c->port_wwn); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC ed_tov %d ra_tov %d\n", + c->ed_tov, c->ra_tov); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC mtu %d intr timer %d\n", + c->maxdatafieldsize, c->intr_timer); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC flags 0x%x luns per tgt %d\n", + c->flags, c->luns_per_tgt); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC flogi_retries %d flogi timeout %d\n", + c->flogi_retries, c->flogi_timeout); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC plogi retries %d plogi timeout %d\n", + c->plogi_retries, c->plogi_timeout); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC io throttle count %d link dn timeout %d\n", + c->io_throttle_count, c->link_down_timeout); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC port dn io retries %d port dn timeout %d\n", + c->port_down_io_retries, c->port_down_timeout); + + return 0; +} + +int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu, + u8 rss_hash_type, + u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable, + u8 tso_ipid_split_en, u8 ig_vlan_strip_en) +{ + u64 a0, a1; + u32 nic_cfg; + int wait = 1000; + + vnic_set_nic_cfg(&nic_cfg, rss_default_cpu, + rss_hash_type, rss_hash_bits, rss_base_cpu, + rss_enable, tso_ipid_split_en, ig_vlan_strip_en); + + a0 = nic_cfg; + a1 = 0; + + return vnic_dev_cmd(fnic->vdev, CMD_NIC_CFG, &a0, &a1, wait); +} + +void fnic_get_res_counts(struct fnic *fnic) +{ + fnic->wq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_WQ); + fnic->raw_wq_count = fnic->wq_count - 1; + fnic->wq_copy_count = fnic->wq_count - fnic->raw_wq_count; + fnic->rq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_RQ); + fnic->cq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_CQ); + fnic->intr_count = vnic_dev_get_res_count(fnic->vdev, + RES_TYPE_INTR_CTRL); +} + +void fnic_free_vnic_resources(struct fnic *fnic) +{ + unsigned int i; + + for (i = 0; i < fnic->raw_wq_count; i++) + vnic_wq_free(&fnic->wq[i]); + + for (i = 0; i < fnic->wq_copy_count; i++) + vnic_wq_copy_free(&fnic->wq_copy[i]); + + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_free(&fnic->rq[i]); + + for (i = 0; i < fnic->cq_count; i++) + vnic_cq_free(&fnic->cq[i]); + + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_free(&fnic->intr[i]); +} + +int fnic_alloc_vnic_resources(struct fnic *fnic) +{ + enum vnic_dev_intr_mode intr_mode; + unsigned int mask_on_assertion; + unsigned int interrupt_offset; + unsigned int error_interrupt_enable; + unsigned int error_interrupt_offset; + unsigned int i, cq_index; + unsigned int wq_copy_cq_desc_count; + int err; + + intr_mode = vnic_dev_get_intr_mode(fnic->vdev); + + shost_printk(KERN_INFO, fnic->lport->host, "vNIC interrupt mode: %s\n", + intr_mode == VNIC_DEV_INTR_MODE_INTX ? "legacy PCI INTx" : + intr_mode == VNIC_DEV_INTR_MODE_MSI ? "MSI" : + intr_mode == VNIC_DEV_INTR_MODE_MSIX ? + "MSI-X" : "unknown"); + + shost_printk(KERN_INFO, fnic->lport->host, "vNIC resources avail: " + "wq %d cp_wq %d raw_wq %d rq %d cq %d intr %d\n", + fnic->wq_count, fnic->wq_copy_count, fnic->raw_wq_count, + fnic->rq_count, fnic->cq_count, fnic->intr_count); + + /* Allocate Raw WQ used for FCS frames */ + for (i = 0; i < fnic->raw_wq_count; i++) { + err = vnic_wq_alloc(fnic->vdev, &fnic->wq[i], i, + fnic->config.wq_enet_desc_count, + sizeof(struct wq_enet_desc)); + if (err) + goto err_out_cleanup; + } + + /* Allocate Copy WQs used for SCSI IOs */ + for (i = 0; i < fnic->wq_copy_count; i++) { + err = vnic_wq_copy_alloc(fnic->vdev, &fnic->wq_copy[i], + (fnic->raw_wq_count + i), + fnic->config.wq_copy_desc_count, + sizeof(struct fcpio_host_req)); + if (err) + goto err_out_cleanup; + } + + /* RQ for receiving FCS frames */ + for (i = 0; i < fnic->rq_count; i++) { + err = vnic_rq_alloc(fnic->vdev, &fnic->rq[i], i, + fnic->config.rq_desc_count, + sizeof(struct rq_enet_desc)); + if (err) + goto err_out_cleanup; + } + + /* CQ for each RQ */ + for (i = 0; i < fnic->rq_count; i++) { + cq_index = i; + err = vnic_cq_alloc(fnic->vdev, + &fnic->cq[cq_index], cq_index, + fnic->config.rq_desc_count, + sizeof(struct cq_enet_rq_desc)); + if (err) + goto err_out_cleanup; + } + + /* CQ for each WQ */ + for (i = 0; i < fnic->raw_wq_count; i++) { + cq_index = fnic->rq_count + i; + err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index], cq_index, + fnic->config.wq_enet_desc_count, + sizeof(struct cq_enet_wq_desc)); + if (err) + goto err_out_cleanup; + } + + /* CQ for each COPY WQ */ + wq_copy_cq_desc_count = (fnic->config.wq_copy_desc_count * 3); + for (i = 0; i < fnic->wq_copy_count; i++) { + cq_index = fnic->raw_wq_count + fnic->rq_count + i; + err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index], + cq_index, + wq_copy_cq_desc_count, + sizeof(struct fcpio_fw_req)); + if (err) + goto err_out_cleanup; + } + + for (i = 0; i < fnic->intr_count; i++) { + err = vnic_intr_alloc(fnic->vdev, &fnic->intr[i], i); + if (err) + goto err_out_cleanup; + } + + fnic->legacy_pba = vnic_dev_get_res(fnic->vdev, + RES_TYPE_INTR_PBA_LEGACY, 0); + + if (!fnic->legacy_pba && intr_mode == VNIC_DEV_INTR_MODE_INTX) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to hook legacy pba resource\n"); + err = -ENODEV; + goto err_out_cleanup; + } + + /* + * Init RQ/WQ resources. + * + * RQ[0 to n-1] point to CQ[0 to n-1] + * WQ[0 to m-1] point to CQ[n to n+m-1] + * WQ_COPY[0 to k-1] points to CQ[n+m to n+m+k-1] + * + * Note for copy wq we always initialize with cq_index = 0 + * + * Error interrupt is not enabled for MSI. + */ + + switch (intr_mode) { + case VNIC_DEV_INTR_MODE_INTX: + case VNIC_DEV_INTR_MODE_MSIX: + error_interrupt_enable = 1; + error_interrupt_offset = fnic->err_intr_offset; + break; + default: + error_interrupt_enable = 0; + error_interrupt_offset = 0; + break; + } + + for (i = 0; i < fnic->rq_count; i++) { + cq_index = i; + vnic_rq_init(&fnic->rq[i], + cq_index, + error_interrupt_enable, + error_interrupt_offset); + } + + for (i = 0; i < fnic->raw_wq_count; i++) { + cq_index = i + fnic->rq_count; + vnic_wq_init(&fnic->wq[i], + cq_index, + error_interrupt_enable, + error_interrupt_offset); + } + + for (i = 0; i < fnic->wq_copy_count; i++) { + vnic_wq_copy_init(&fnic->wq_copy[i], + 0 /* cq_index 0 - always */, + error_interrupt_enable, + error_interrupt_offset); + } + + for (i = 0; i < fnic->cq_count; i++) { + + switch (intr_mode) { + case VNIC_DEV_INTR_MODE_MSIX: + interrupt_offset = i; + break; + default: + interrupt_offset = 0; + break; + } + + vnic_cq_init(&fnic->cq[i], + 0 /* flow_control_enable */, + 1 /* color_enable */, + 0 /* cq_head */, + 0 /* cq_tail */, + 1 /* cq_tail_color */, + 1 /* interrupt_enable */, + 1 /* cq_entry_enable */, + 0 /* cq_message_enable */, + interrupt_offset, + 0 /* cq_message_addr */); + } + + /* + * Init INTR resources + * + * mask_on_assertion is not used for INTx due to the level- + * triggered nature of INTx + */ + + switch (intr_mode) { + case VNIC_DEV_INTR_MODE_MSI: + case VNIC_DEV_INTR_MODE_MSIX: + mask_on_assertion = 1; + break; + default: + mask_on_assertion = 0; + break; + } + + for (i = 0; i < fnic->intr_count; i++) { + vnic_intr_init(&fnic->intr[i], + fnic->config.intr_timer, + fnic->config.intr_timer_type, + mask_on_assertion); + } + + /* init the stats memory by making the first call here */ + err = vnic_dev_stats_dump(fnic->vdev, &fnic->stats); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vnic_dev_stats_dump failed - x%x\n", err); + goto err_out_cleanup; + } + + /* Clear LIF stats */ + vnic_dev_stats_clear(fnic->vdev); + + return 0; + +err_out_cleanup: + fnic_free_vnic_resources(fnic); + + return err; +} diff --git a/drivers/scsi/fnic/fnic_res.h b/drivers/scsi/fnic/fnic_res.h new file mode 100644 index 000000000000..b6f310262534 --- /dev/null +++ b/drivers/scsi/fnic/fnic_res.h @@ -0,0 +1,197 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FNIC_RES_H_ +#define _FNIC_RES_H_ + +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "fnic_io.h" +#include "fcpio.h" +#include "vnic_wq_copy.h" +#include "vnic_cq_copy.h" + +static inline void fnic_queue_wq_desc(struct vnic_wq *wq, + void *os_buf, dma_addr_t dma_addr, + unsigned int len, unsigned int fc_eof, + int vlan_tag_insert, + unsigned int vlan_tag, + int cq_entry, int sop, int eop) +{ + struct wq_enet_desc *desc = vnic_wq_next_desc(wq); + + wq_enet_desc_enc(desc, + (u64)dma_addr | VNIC_PADDR_TARGET, + (u16)len, + 0, /* mss_or_csum_offset */ + (u16)fc_eof, + 0, /* offload_mode */ + (u8)eop, (u8)cq_entry, + 1, /* fcoe_encap */ + (u8)vlan_tag_insert, + (u16)vlan_tag, + 0 /* loopback */); + + vnic_wq_post(wq, os_buf, dma_addr, len, sop, eop); +} + +static inline void fnic_queue_wq_copy_desc_icmnd_16(struct vnic_wq_copy *wq, + u32 req_id, + u32 lunmap_id, u8 spl_flags, + u32 sgl_cnt, u32 sense_len, + u64 sgl_addr, u64 sns_addr, + u8 crn, u8 pri_ta, + u8 flags, u8 *scsi_cdb, + u32 data_len, u8 *lun, + u32 d_id, u16 mss, + u32 ratov, u32 edtov) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_ICMND_16; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.icmnd_16.lunmap_id = lunmap_id; /* index into lunmap table */ + desc->u.icmnd_16.special_req_flags = spl_flags; /* exch req flags */ + desc->u.icmnd_16._resvd0[0] = 0; /* reserved */ + desc->u.icmnd_16._resvd0[1] = 0; /* reserved */ + desc->u.icmnd_16._resvd0[2] = 0; /* reserved */ + desc->u.icmnd_16.sgl_cnt = sgl_cnt; /* scatter-gather list count */ + desc->u.icmnd_16.sense_len = sense_len; /* sense buffer length */ + desc->u.icmnd_16.sgl_addr = sgl_addr; /* scatter-gather list addr */ + desc->u.icmnd_16.sense_addr = sns_addr; /* sense buffer address */ + desc->u.icmnd_16.crn = crn; /* SCSI Command Reference No.*/ + desc->u.icmnd_16.pri_ta = pri_ta; /* SCSI Pri & Task attribute */ + desc->u.icmnd_16._resvd1 = 0; /* reserved: should be 0 */ + desc->u.icmnd_16.flags = flags; /* command flags */ + memcpy(desc->u.icmnd_16.scsi_cdb, scsi_cdb, CDB_16); /* SCSI CDB */ + desc->u.icmnd_16.data_len = data_len; /* length of data expected */ + memcpy(desc->u.icmnd_16.lun, lun, LUN_ADDRESS); /* LUN address */ + desc->u.icmnd_16._resvd2 = 0; /* reserved */ + hton24(desc->u.icmnd_16.d_id, d_id); /* FC vNIC only: Target D_ID */ + desc->u.icmnd_16.mss = mss; /* FC vNIC only: max burst */ + desc->u.icmnd_16.r_a_tov = ratov; /*FC vNIC only: Res. Alloc Timeout */ + desc->u.icmnd_16.e_d_tov = edtov; /*FC vNIC only: Err Detect Timeout */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_itmf(struct vnic_wq_copy *wq, + u32 req_id, u32 lunmap_id, + u32 tm_req, u32 tm_id, u8 *lun, + u32 d_id, u32 r_a_tov, + u32 e_d_tov) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_ITMF; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.itmf.lunmap_id = lunmap_id; /* index into lunmap table */ + desc->u.itmf.tm_req = tm_req; /* SCSI Task Management request */ + desc->u.itmf.t_tag = tm_id; /* tag of fcpio to be aborted */ + desc->u.itmf._resvd = 0; + memcpy(desc->u.itmf.lun, lun, LUN_ADDRESS); /* LUN address */ + desc->u.itmf._resvd1 = 0; + hton24(desc->u.itmf.d_id, d_id); /* FC vNIC only: Target D_ID */ + desc->u.itmf.r_a_tov = r_a_tov; /* FC vNIC only: R_A_TOV in msec */ + desc->u.itmf.e_d_tov = e_d_tov; /* FC vNIC only: E_D_TOV in msec */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_flogi_reg(struct vnic_wq_copy *wq, + u32 req_id, u8 format, + u32 s_id, u8 *gw_mac) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_FLOGI_REG; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.flogi_reg.format = format; + hton24(desc->u.flogi_reg.s_id, s_id); + memcpy(desc->u.flogi_reg.gateway_mac, gw_mac, ETH_ALEN); + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_fw_reset(struct vnic_wq_copy *wq, + u32 req_id) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_RESET; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_lunmap(struct vnic_wq_copy *wq, + u32 req_id, u64 lunmap_addr, + u32 lunmap_len) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_LUNMAP_REQ; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.lunmap_req.addr = lunmap_addr; /* address of the buffer */ + desc->u.lunmap_req.len = lunmap_len; /* len of the buffer */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_rq_desc(struct vnic_rq *rq, + void *os_buf, dma_addr_t dma_addr, + u16 len) +{ + struct rq_enet_desc *desc = vnic_rq_next_desc(rq); + + rq_enet_desc_enc(desc, + (u64)dma_addr | VNIC_PADDR_TARGET, + RQ_ENET_TYPE_ONLY_SOP, + (u16)len); + + vnic_rq_post(rq, os_buf, 0, dma_addr, len); +} + + +struct fnic; + +int fnic_get_vnic_config(struct fnic *); +int fnic_alloc_vnic_resources(struct fnic *); +void fnic_free_vnic_resources(struct fnic *); +void fnic_get_res_counts(struct fnic *); +int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu, + u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, + u8 rss_enable, u8 tso_ipid_split_en, + u8 ig_vlan_strip_en); + +#endif /* _FNIC_RES_H_ */ diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c new file mode 100644 index 000000000000..eabf36502856 --- /dev/null +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -0,0 +1,1850 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fnic_io.h" +#include "fnic.h" + +const char *fnic_state_str[] = { + [FNIC_IN_FC_MODE] = "FNIC_IN_FC_MODE", + [FNIC_IN_FC_TRANS_ETH_MODE] = "FNIC_IN_FC_TRANS_ETH_MODE", + [FNIC_IN_ETH_MODE] = "FNIC_IN_ETH_MODE", + [FNIC_IN_ETH_TRANS_FC_MODE] = "FNIC_IN_ETH_TRANS_FC_MODE", +}; + +static const char *fnic_ioreq_state_str[] = { + [FNIC_IOREQ_CMD_PENDING] = "FNIC_IOREQ_CMD_PENDING", + [FNIC_IOREQ_ABTS_PENDING] = "FNIC_IOREQ_ABTS_PENDING", + [FNIC_IOREQ_ABTS_COMPLETE] = "FNIC_IOREQ_ABTS_COMPLETE", + [FNIC_IOREQ_CMD_COMPLETE] = "FNIC_IOREQ_CMD_COMPLETE", +}; + +static const char *fcpio_status_str[] = { + [FCPIO_SUCCESS] = "FCPIO_SUCCESS", /*0x0*/ + [FCPIO_INVALID_HEADER] = "FCPIO_INVALID_HEADER", + [FCPIO_OUT_OF_RESOURCE] = "FCPIO_OUT_OF_RESOURCE", + [FCPIO_INVALID_PARAM] = "FCPIO_INVALID_PARAM]", + [FCPIO_REQ_NOT_SUPPORTED] = "FCPIO_REQ_NOT_SUPPORTED", + [FCPIO_IO_NOT_FOUND] = "FCPIO_IO_NOT_FOUND", + [FCPIO_ABORTED] = "FCPIO_ABORTED", /*0x41*/ + [FCPIO_TIMEOUT] = "FCPIO_TIMEOUT", + [FCPIO_SGL_INVALID] = "FCPIO_SGL_INVALID", + [FCPIO_MSS_INVALID] = "FCPIO_MSS_INVALID", + [FCPIO_DATA_CNT_MISMATCH] = "FCPIO_DATA_CNT_MISMATCH", + [FCPIO_FW_ERR] = "FCPIO_FW_ERR", + [FCPIO_ITMF_REJECTED] = "FCPIO_ITMF_REJECTED", + [FCPIO_ITMF_FAILED] = "FCPIO_ITMF_FAILED", + [FCPIO_ITMF_INCORRECT_LUN] = "FCPIO_ITMF_INCORRECT_LUN", + [FCPIO_CMND_REJECTED] = "FCPIO_CMND_REJECTED", + [FCPIO_NO_PATH_AVAIL] = "FCPIO_NO_PATH_AVAIL", + [FCPIO_PATH_FAILED] = "FCPIO_PATH_FAILED", + [FCPIO_LUNMAP_CHNG_PEND] = "FCPIO_LUNHMAP_CHNG_PEND", +}; + +const char *fnic_state_to_str(unsigned int state) +{ + if (state >= ARRAY_SIZE(fnic_state_str) || !fnic_state_str[state]) + return "unknown"; + + return fnic_state_str[state]; +} + +static const char *fnic_ioreq_state_to_str(unsigned int state) +{ + if (state >= ARRAY_SIZE(fnic_ioreq_state_str) || + !fnic_ioreq_state_str[state]) + return "unknown"; + + return fnic_ioreq_state_str[state]; +} + +static const char *fnic_fcpio_status_to_str(unsigned int status) +{ + if (status >= ARRAY_SIZE(fcpio_status_str) || !fcpio_status_str[status]) + return "unknown"; + + return fcpio_status_str[status]; +} + +static void fnic_cleanup_io(struct fnic *fnic, int exclude_id); + +static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic, + struct scsi_cmnd *sc) +{ + u32 hash = sc->request->tag & (FNIC_IO_LOCKS - 1); + + return &fnic->io_req_lock[hash]; +} + +/* + * Unmap the data buffer and sense buffer for an io_req, + * also unmap and free the device-private scatter/gather list. + */ +static void fnic_release_ioreq_buf(struct fnic *fnic, + struct fnic_io_req *io_req, + struct scsi_cmnd *sc) +{ + if (io_req->sgl_list_pa) + pci_unmap_single(fnic->pdev, io_req->sgl_list_pa, + sizeof(io_req->sgl_list[0]) * io_req->sgl_cnt, + PCI_DMA_TODEVICE); + scsi_dma_unmap(sc); + + if (io_req->sgl_cnt) + mempool_free(io_req->sgl_list_alloc, + fnic->io_sgl_pool[io_req->sgl_type]); + if (io_req->sense_buf_pa) + pci_unmap_single(fnic->pdev, io_req->sense_buf_pa, + SCSI_SENSE_BUFFERSIZE, PCI_DMA_FROMDEVICE); +} + +/* Free up Copy Wq descriptors. Called with copy_wq lock held */ +static int free_wq_copy_descs(struct fnic *fnic, struct vnic_wq_copy *wq) +{ + /* if no Ack received from firmware, then nothing to clean */ + if (!fnic->fw_ack_recd[0]) + return 1; + + /* + * Update desc_available count based on number of freed descriptors + * Account for wraparound + */ + if (wq->to_clean_index <= fnic->fw_ack_index[0]) + wq->ring.desc_avail += (fnic->fw_ack_index[0] + - wq->to_clean_index + 1); + else + wq->ring.desc_avail += (wq->ring.desc_count + - wq->to_clean_index + + fnic->fw_ack_index[0] + 1); + + /* + * just bump clean index to ack_index+1 accounting for wraparound + * this will essentially free up all descriptors between + * to_clean_index and fw_ack_index, both inclusive + */ + wq->to_clean_index = + (fnic->fw_ack_index[0] + 1) % wq->ring.desc_count; + + /* we have processed the acks received so far */ + fnic->fw_ack_recd[0] = 0; + return 0; +} + + +/* + * fnic_fw_reset_handler + * Routine to send reset msg to fw + */ +int fnic_fw_reset_handler(struct fnic *fnic) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) + ret = -EAGAIN; + else + fnic_queue_wq_copy_desc_fw_reset(wq, SCSI_NO_TAG); + + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + + if (!ret) + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Issued fw reset\n"); + else + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Failed to issue fw reset\n"); + return ret; +} + + +/* + * fnic_flogi_reg_handler + * Routine to send flogi register msg to fw + */ +int fnic_flogi_reg_handler(struct fnic *fnic) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + u8 gw_mac[ETH_ALEN]; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) { + ret = -EAGAIN; + goto flogi_reg_ioreq_end; + } + + if (fnic->fcoui_mode) + memset(gw_mac, 0xff, ETH_ALEN); + else + memcpy(gw_mac, fnic->dest_addr, ETH_ALEN); + + fnic_queue_wq_copy_desc_flogi_reg(wq, SCSI_NO_TAG, + FCPIO_FLOGI_REG_GW_DEST, + fnic->s_id, + gw_mac); + +flogi_reg_ioreq_end: + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + + if (!ret) + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "flog reg issued\n"); + + return ret; +} + +/* + * fnic_queue_wq_copy_desc + * Routine to enqueue a wq copy desc + */ +static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, + struct vnic_wq_copy *wq, + struct fnic_io_req *io_req, + struct scsi_cmnd *sc, + u32 sg_count) +{ + struct scatterlist *sg; + struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); + struct fc_rport_libfc_priv *rp = rport->dd_data; + struct host_sg_desc *desc; + u8 pri_tag = 0; + unsigned int i; + unsigned long intr_flags; + int flags; + u8 exch_flags; + struct scsi_lun fc_lun; + char msg[2]; + + if (sg_count) { + BUG_ON(sg_count < 0); + BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT); + + /* For each SGE, create a device desc entry */ + desc = io_req->sgl_list; + for_each_sg(scsi_sglist(sc), sg, sg_count, i) { + desc->addr = cpu_to_le64(sg_dma_address(sg)); + desc->len = cpu_to_le32(sg_dma_len(sg)); + desc->_resvd = 0; + desc++; + } + + io_req->sgl_list_pa = pci_map_single + (fnic->pdev, + io_req->sgl_list, + sizeof(io_req->sgl_list[0]) * sg_count, + PCI_DMA_TODEVICE); + } + + io_req->sense_buf_pa = pci_map_single(fnic->pdev, + sc->sense_buffer, + SCSI_SENSE_BUFFERSIZE, + PCI_DMA_FROMDEVICE); + + int_to_scsilun(sc->device->lun, &fc_lun); + + pri_tag = FCPIO_ICMND_PTA_SIMPLE; + msg[0] = MSG_SIMPLE_TAG; + scsi_populate_tag_msg(sc, msg); + if (msg[0] == MSG_ORDERED_TAG) + pri_tag = FCPIO_ICMND_PTA_ORDERED; + + /* Enqueue the descriptor in the Copy WQ */ + spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (unlikely(!vnic_wq_copy_desc_avail(wq))) { + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + return SCSI_MLQUEUE_HOST_BUSY; + } + + flags = 0; + if (sc->sc_data_direction == DMA_FROM_DEVICE) + flags = FCPIO_ICMND_RDDATA; + else if (sc->sc_data_direction == DMA_TO_DEVICE) + flags = FCPIO_ICMND_WRDATA; + + exch_flags = 0; + if ((fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR) && + (rp->flags & FC_RP_FLAGS_RETRY)) + exch_flags |= FCPIO_ICMND_SRFLAG_RETRY; + + fnic_queue_wq_copy_desc_icmnd_16(wq, sc->request->tag, + 0, exch_flags, io_req->sgl_cnt, + SCSI_SENSE_BUFFERSIZE, + io_req->sgl_list_pa, + io_req->sense_buf_pa, + 0, /* scsi cmd ref, always 0 */ + pri_tag, /* scsi pri and tag */ + flags, /* command flags */ + sc->cmnd, scsi_bufflen(sc), + fc_lun.scsi_lun, io_req->port_id, + rport->maxframe_size, rp->r_a_tov, + rp->e_d_tov); + + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + return 0; +} + +/* + * fnic_queuecommand + * Routine to send a scsi cdb + * Called with host_lock held and interrupts disabled. + */ +int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) +{ + struct fc_lport *lp; + struct fc_rport *rport; + struct fnic_io_req *io_req; + struct fnic *fnic; + struct vnic_wq_copy *wq; + int ret; + u32 sg_count; + unsigned long flags; + unsigned long ptr; + + rport = starget_to_rport(scsi_target(sc->device)); + ret = fc_remote_port_chkready(rport); + if (ret) { + sc->result = ret; + done(sc); + return 0; + } + + lp = shost_priv(sc->device->host); + if (lp->state != LPORT_ST_READY || !(lp->link_up)) + return SCSI_MLQUEUE_HOST_BUSY; + + /* + * Release host lock, use driver resource specific locks from here. + * Don't re-enable interrupts in case they were disabled prior to the + * caller disabling them. + */ + spin_unlock(lp->host->host_lock); + + /* Get a new io_req for this SCSI IO */ + fnic = lport_priv(lp); + + io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC); + if (!io_req) { + ret = SCSI_MLQUEUE_HOST_BUSY; + goto out; + } + memset(io_req, 0, sizeof(*io_req)); + + /* Map the data buffer */ + sg_count = scsi_dma_map(sc); + if (sg_count < 0) { + mempool_free(io_req, fnic->io_req_pool); + goto out; + } + + /* Determine the type of scatter/gather list we need */ + io_req->sgl_cnt = sg_count; + io_req->sgl_type = FNIC_SGL_CACHE_DFLT; + if (sg_count > FNIC_DFLT_SG_DESC_CNT) + io_req->sgl_type = FNIC_SGL_CACHE_MAX; + + if (sg_count) { + io_req->sgl_list = + mempool_alloc(fnic->io_sgl_pool[io_req->sgl_type], + GFP_ATOMIC | GFP_DMA); + if (!io_req->sgl_list) { + ret = SCSI_MLQUEUE_HOST_BUSY; + scsi_dma_unmap(sc); + mempool_free(io_req, fnic->io_req_pool); + goto out; + } + + /* Cache sgl list allocated address before alignment */ + io_req->sgl_list_alloc = io_req->sgl_list; + ptr = (unsigned long) io_req->sgl_list; + if (ptr % FNIC_SG_DESC_ALIGN) { + io_req->sgl_list = (struct host_sg_desc *) + (((unsigned long) ptr + + FNIC_SG_DESC_ALIGN - 1) + & ~(FNIC_SG_DESC_ALIGN - 1)); + } + } + + /* initialize rest of io_req */ + io_req->port_id = rport->port_id; + CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; + CMD_SP(sc) = (char *)io_req; + sc->scsi_done = done; + + /* create copy wq desc and enqueue it */ + wq = &fnic->wq_copy[0]; + ret = fnic_queue_wq_copy_desc(fnic, wq, io_req, sc, sg_count); + if (ret) { + /* + * In case another thread cancelled the request, + * refetch the pointer under the lock. + */ + spinlock_t *io_lock = fnic_io_lock_hash(fnic, sc); + + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + CMD_SP(sc) = NULL; + CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + spin_unlock_irqrestore(io_lock, flags); + if (io_req) { + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + } + } +out: + /* acquire host lock before returning to SCSI */ + spin_lock(lp->host->host_lock); + return ret; +} + +/* + * fnic_fcpio_fw_reset_cmpl_handler + * Routine to handle fw reset completion + */ +static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + int ret = 0; + struct fc_frame *flogi; + unsigned long flags; + + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + + /* Clean up all outstanding io requests */ + fnic_cleanup_io(fnic, SCSI_NO_TAG); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + + flogi = fnic->flogi; + fnic->flogi = NULL; + + /* fnic should be in FC_TRANS_ETH_MODE */ + if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) { + /* Check status of reset completion */ + if (!hdr_status) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "reset cmpl success\n"); + /* Ready to send flogi out */ + fnic->state = FNIC_IN_ETH_MODE; + } else { + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic fw_reset : failed %s\n", + fnic_fcpio_status_to_str(hdr_status)); + + /* + * Unable to change to eth mode, cannot send out flogi + * Change state to fc mode, so that subsequent Flogi + * requests from libFC will cause more attempts to + * reset the firmware. Free the cached flogi + */ + fnic->state = FNIC_IN_FC_MODE; + ret = -1; + } + } else { + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Unexpected state %s while processing" + " reset cmpl\n", fnic_state_to_str(fnic->state)); + ret = -1; + } + + /* Thread removing device blocks till firmware reset is complete */ + if (fnic->remove_wait) + complete(fnic->remove_wait); + + /* + * If fnic is being removed, or fw reset failed + * free the flogi frame. Else, send it out + */ + if (fnic->remove_wait || ret) { + fnic->flogi_oxid = FC_XID_UNKNOWN; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + if (flogi) + dev_kfree_skb_irq(fp_skb(flogi)); + goto reset_cmpl_handler_end; + } + + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (flogi) + ret = fnic_send_frame(fnic, flogi); + + reset_cmpl_handler_end: + return ret; +} + +/* + * fnic_fcpio_flogi_reg_cmpl_handler + * Routine to handle flogi register completion + */ +static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + int ret = 0; + struct fc_frame *flogi_resp = NULL; + unsigned long flags; + struct sk_buff *skb; + + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + + /* Update fnic state based on status of flogi reg completion */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + + flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + + if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE) { + + /* Check flogi registration completion status */ + if (!hdr_status) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "flog reg succeeded\n"); + fnic->state = FNIC_IN_FC_MODE; + } else { + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic flogi reg :failed %s\n", + fnic_fcpio_status_to_str(hdr_status)); + fnic->state = FNIC_IN_ETH_MODE; + ret = -1; + } + } else { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Unexpected fnic state %s while" + " processing flogi reg completion\n", + fnic_state_to_str(fnic->state)); + ret = -1; + } + + /* Successful flogi reg cmpl, pass frame to LibFC */ + if (!ret && flogi_resp) { + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + goto reg_cmpl_handler_end; + } + skb = (struct sk_buff *)flogi_resp; + /* Use fr_flags to indicate whether flogi resp or not */ + fr_flags(flogi_resp) = 1; + fr_dev(flogi_resp) = fnic->lport; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + skb_queue_tail(&fnic->frame_queue, skb); + queue_work(fnic_event_queue, &fnic->frame_work); + + } else { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + if (flogi_resp) + dev_kfree_skb_irq(fp_skb(flogi_resp)); + } + +reg_cmpl_handler_end: + return ret; +} + +static inline int is_ack_index_in_range(struct vnic_wq_copy *wq, + u16 request_out) +{ + if (wq->to_clean_index <= wq->to_use_index) { + /* out of range, stale request_out index */ + if (request_out < wq->to_clean_index || + request_out >= wq->to_use_index) + return 0; + } else { + /* out of range, stale request_out index */ + if (request_out < wq->to_clean_index && + request_out >= wq->to_use_index) + return 0; + } + /* request_out index is in range */ + return 1; +} + + +/* + * Mark that ack received and store the Ack index. If there are multiple + * acks received before Tx thread cleans it up, the latest value will be + * used which is correct behavior. This state should be in the copy Wq + * instead of in the fnic + */ +static inline void fnic_fcpio_ack_handler(struct fnic *fnic, + unsigned int cq_index, + struct fcpio_fw_req *desc) +{ + struct vnic_wq_copy *wq; + u16 request_out = desc->u.ack.request_out; + unsigned long flags; + + /* mark the ack state */ + wq = &fnic->wq_copy[cq_index - fnic->raw_wq_count - fnic->rq_count]; + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (is_ack_index_in_range(wq, request_out)) { + fnic->fw_ack_index[0] = request_out; + fnic->fw_ack_recd[0] = 1; + } + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); +} + +/* + * fnic_fcpio_icmnd_cmpl_handler + * Routine to handle icmnd completions + */ +static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + u32 id; + u64 xfer_len = 0; + struct fcpio_icmnd_cmpl *icmnd_cmpl; + struct fnic_io_req *io_req; + struct scsi_cmnd *sc; + unsigned long flags; + spinlock_t *io_lock; + + /* Decode the cmpl description to get the io_req id */ + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + fcpio_tag_id_dec(&tag, &id); + + if (id >= FNIC_MAX_IO_REQ) + return; + + sc = scsi_host_find_tag(fnic->lport->host, id); + WARN_ON_ONCE(!sc); + if (!sc) + return; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + WARN_ON_ONCE(!io_req); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + return; + } + + /* firmware completed the io */ + io_req->io_completed = 1; + + /* + * if SCSI-ML has already issued abort on this command, + * ignore completion of the IO. The abts path will clean it up + */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + return; + } + + /* Mark the IO as complete */ + CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + + icmnd_cmpl = &desc->u.icmnd_cmpl; + + switch (hdr_status) { + case FCPIO_SUCCESS: + sc->result = (DID_OK << 16) | icmnd_cmpl->scsi_status; + xfer_len = scsi_bufflen(sc); + scsi_set_resid(sc, icmnd_cmpl->residual); + + if (icmnd_cmpl->flags & FCPIO_ICMND_CMPL_RESID_UNDER) + xfer_len -= icmnd_cmpl->residual; + + /* + * If queue_full, then try to reduce queue depth for all + * LUNS on the target. Todo: this should be accompanied + * by a periodic queue_depth rampup based on successful + * IO completion. + */ + if (icmnd_cmpl->scsi_status == QUEUE_FULL) { + struct scsi_device *t_sdev; + int qd = 0; + + shost_for_each_device(t_sdev, sc->device->host) { + if (t_sdev->id != sc->device->id) + continue; + + if (t_sdev->queue_depth > 1) { + qd = scsi_track_queue_full + (t_sdev, + t_sdev->queue_depth - 1); + if (qd == -1) + qd = t_sdev->host->cmd_per_lun; + shost_printk(KERN_INFO, + fnic->lport->host, + "scsi[%d:%d:%d:%d" + "] queue full detected," + "new depth = %d\n", + t_sdev->host->host_no, + t_sdev->channel, + t_sdev->id, t_sdev->lun, + t_sdev->queue_depth); + } + } + } + break; + + case FCPIO_TIMEOUT: /* request was timed out */ + sc->result = (DID_TIME_OUT << 16) | icmnd_cmpl->scsi_status; + break; + + case FCPIO_ABORTED: /* request was aborted */ + sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status; + break; + + case FCPIO_DATA_CNT_MISMATCH: /* recv/sent more/less data than exp. */ + scsi_set_resid(sc, icmnd_cmpl->residual); + sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status; + break; + + case FCPIO_OUT_OF_RESOURCE: /* out of resources to complete request */ + sc->result = (DID_REQUEUE << 16) | icmnd_cmpl->scsi_status; + break; + case FCPIO_INVALID_HEADER: /* header contains invalid data */ + case FCPIO_INVALID_PARAM: /* some parameter in request invalid */ + case FCPIO_REQ_NOT_SUPPORTED:/* request type is not supported */ + case FCPIO_IO_NOT_FOUND: /* requested I/O was not found */ + case FCPIO_SGL_INVALID: /* request was aborted due to sgl error */ + case FCPIO_MSS_INVALID: /* request was aborted due to mss error */ + case FCPIO_FW_ERR: /* request was terminated due fw error */ + default: + shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n", + fnic_fcpio_status_to_str(hdr_status)); + sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status; + break; + } + + /* Break link with the SCSI command */ + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + + mempool_free(io_req, fnic->io_req_pool); + + if (sc->sc_data_direction == DMA_FROM_DEVICE) { + fnic->lport->host_stats.fcp_input_requests++; + fnic->fcp_input_bytes += xfer_len; + } else if (sc->sc_data_direction == DMA_TO_DEVICE) { + fnic->lport->host_stats.fcp_output_requests++; + fnic->fcp_output_bytes += xfer_len; + } else + fnic->lport->host_stats.fcp_control_requests++; + + /* Call SCSI completion function to complete the IO */ + if (sc->scsi_done) + sc->scsi_done(sc); + +} + +/* fnic_fcpio_itmf_cmpl_handler + * Routine to handle itmf completions + */ +static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + u32 id; + struct scsi_cmnd *sc; + struct fnic_io_req *io_req; + unsigned long flags; + spinlock_t *io_lock; + + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + fcpio_tag_id_dec(&tag, &id); + + if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) + return; + + sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK); + WARN_ON_ONCE(!sc); + if (!sc) + return; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + WARN_ON_ONCE(!io_req); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + return; + } + + if (id & FNIC_TAG_ABORT) { + /* Completion of abort cmd */ + if (CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING) { + /* This is a late completion. Ignore it */ + spin_unlock_irqrestore(io_lock, flags); + return; + } + CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; + CMD_ABTS_STATUS(sc) = hdr_status; + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "abts cmpl recd. id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + + /* + * If scsi_eh thread is blocked waiting for abts to complete, + * signal completion to it. IO will be cleaned in the thread + * else clean it in this context + */ + if (io_req->abts_done) { + complete(io_req->abts_done); + spin_unlock_irqrestore(io_lock, flags); + } else { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "abts cmpl, completing IO\n"); + CMD_SP(sc) = NULL; + sc->result = (DID_ERROR << 16); + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + if (sc->scsi_done) + sc->scsi_done(sc); + } + + } else if (id & FNIC_TAG_DEV_RST) { + /* Completion of device reset */ + CMD_LR_STATUS(sc) = hdr_status; + CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "dev reset cmpl recd. id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + if (io_req->dr_done) + complete(io_req->dr_done); + spin_unlock_irqrestore(io_lock, flags); + + } else { + shost_printk(KERN_ERR, fnic->lport->host, + "Unexpected itmf io state %s tag %x\n", + fnic_ioreq_state_to_str(CMD_STATE(sc)), id); + spin_unlock_irqrestore(io_lock, flags); + } + +} + +/* + * fnic_fcpio_cmpl_handler + * Routine to service the cq for wq_copy + */ +static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, + unsigned int cq_index, + struct fcpio_fw_req *desc) +{ + struct fnic *fnic = vnic_dev_priv(vdev); + int ret = 0; + + switch (desc->hdr.type) { + case FCPIO_ACK: /* fw copied copy wq desc to its queue */ + fnic_fcpio_ack_handler(fnic, cq_index, desc); + break; + + case FCPIO_ICMND_CMPL: /* fw completed a command */ + fnic_fcpio_icmnd_cmpl_handler(fnic, desc); + break; + + case FCPIO_ITMF_CMPL: /* fw completed itmf (abort cmd, lun reset)*/ + fnic_fcpio_itmf_cmpl_handler(fnic, desc); + break; + + case FCPIO_FLOGI_REG_CMPL: /* fw completed flogi_reg */ + ret = fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc); + break; + + case FCPIO_RESET_CMPL: /* fw completed reset */ + ret = fnic_fcpio_fw_reset_cmpl_handler(fnic, desc); + break; + + default: + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "firmware completion type %d\n", + desc->hdr.type); + break; + } + + return ret; +} + +/* + * fnic_wq_copy_cmpl_handler + * Routine to process wq copy + */ +int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do) +{ + unsigned int wq_work_done = 0; + unsigned int i, cq_index; + unsigned int cur_work_done; + + for (i = 0; i < fnic->wq_copy_count; i++) { + cq_index = i + fnic->raw_wq_count + fnic->rq_count; + cur_work_done = vnic_cq_copy_service(&fnic->cq[cq_index], + fnic_fcpio_cmpl_handler, + copy_work_to_do); + wq_work_done += cur_work_done; + } + return wq_work_done; +} + +static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) +{ + unsigned int i; + struct fnic_io_req *io_req; + unsigned long flags = 0; + struct scsi_cmnd *sc; + spinlock_t *io_lock; + + for (i = 0; i < FNIC_MAX_IO_REQ; i++) { + if (i == exclude_id) + continue; + + sc = scsi_host_find_tag(fnic->lport->host, i); + if (!sc) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto cleanup_scsi_cmd; + } + + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + /* + * If there is a scsi_cmnd associated with this io_req, then + * free the corresponding state + */ + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + +cleanup_scsi_cmd: + sc->result = DID_TRANSPORT_DISRUPTED << 16; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_cleanup_io:" + " DID_TRANSPORT_DISRUPTED\n"); + + /* Complete the command to SCSI */ + if (sc->scsi_done) + sc->scsi_done(sc); + } +} + +void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, + struct fcpio_host_req *desc) +{ + u32 id; + struct fnic *fnic = vnic_dev_priv(wq->vdev); + struct fnic_io_req *io_req; + struct scsi_cmnd *sc; + unsigned long flags; + spinlock_t *io_lock; + + /* get the tag reference */ + fcpio_tag_id_dec(&desc->hdr.tag, &id); + id &= FNIC_TAG_MASK; + + if (id >= FNIC_MAX_IO_REQ) + return; + + sc = scsi_host_find_tag(fnic->lport->host, id); + if (!sc) + return; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + /* Get the IO context which this desc refers to */ + io_req = (struct fnic_io_req *)CMD_SP(sc); + + /* fnic interrupts are turned off by now */ + + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto wq_copy_cleanup_scsi_cmd; + } + + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + +wq_copy_cleanup_scsi_cmd: + sc->result = DID_NO_CONNECT << 16; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "wq_copy_cleanup_handler:" + " DID_NO_CONNECT\n"); + + if (sc->scsi_done) + sc->scsi_done(sc); +} + +static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, + u32 task_req, u8 *fc_lun, + struct fnic_io_req *io_req) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) { + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + return 1; + } + fnic_queue_wq_copy_desc_itmf(wq, tag | FNIC_TAG_ABORT, + 0, task_req, tag, fc_lun, io_req->port_id, + fnic->config.ra_tov, fnic->config.ed_tov); + + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + return 0; +} + +void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + struct scsi_cmnd *sc; + struct scsi_lun fc_lun; + enum fnic_ioreq_state old_ioreq_state; + + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic_rport_reset_exch called portid 0x%06x\n", + port_id); + + if (fnic->in_remove) + return; + + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + if (!sc) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || io_req->port_id != port_id) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to rport that went away + */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + old_ioreq_state = CMD_STATE(sc); + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + + BUG_ON(io_req->abts_done); + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_reset_exch: Issuing abts\n"); + + spin_unlock_irqrestore(io_lock, flags); + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + + if (fnic_queue_abort_io_req(fnic, tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + /* + * Revert the cmd state back to old state, if + * it hasnt changed in between. This cmd will get + * aborted later by scsi_eh, or cleaned up during + * lun reset + */ + io_lock = fnic_io_lock_hash(fnic, sc); + + spin_lock_irqsave(io_lock, flags); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; + spin_unlock_irqrestore(io_lock, flags); + } + } + +} + +void fnic_terminate_rport_io(struct fc_rport *rport) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + struct scsi_cmnd *sc; + struct scsi_lun fc_lun; + struct fc_rport_libfc_priv *rdata = rport->dd_data; + struct fc_lport *lport = rdata->local_port; + struct fnic *fnic = lport_priv(lport); + struct fc_rport *cmd_rport; + enum fnic_ioreq_state old_ioreq_state; + + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, "fnic_terminate_rport_io called" + " wwpn 0x%llx, wwnn0x%llx, portid 0x%06x\n", + rport->port_name, rport->node_name, + rport->port_id); + + if (fnic->in_remove) + return; + + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + if (!sc) + continue; + + cmd_rport = starget_to_rport(scsi_target(sc->device)); + if (rport != cmd_rport) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || rport != cmd_rport) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to rport that went away + */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + old_ioreq_state = CMD_STATE(sc); + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + + BUG_ON(io_req->abts_done); + + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic_terminate_rport_io: Issuing abts\n"); + + spin_unlock_irqrestore(io_lock, flags); + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + + if (fnic_queue_abort_io_req(fnic, tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + /* + * Revert the cmd state back to old state, if + * it hasnt changed in between. This cmd will get + * aborted later by scsi_eh, or cleaned up during + * lun reset + */ + io_lock = fnic_io_lock_hash(fnic, sc); + + spin_lock_irqsave(io_lock, flags); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; + spin_unlock_irqrestore(io_lock, flags); + } + } + +} + +static void fnic_block_error_handler(struct scsi_cmnd *sc) +{ + struct Scsi_Host *shost = sc->device->host; + struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + while (rport->port_state == FC_PORTSTATE_BLOCKED) { + spin_unlock_irqrestore(shost->host_lock, flags); + msleep(1000); + spin_lock_irqsave(shost->host_lock, flags); + } + spin_unlock_irqrestore(shost->host_lock, flags); + +} + +/* + * This function is exported to SCSI for sending abort cmnds. + * A SCSI IO is represented by a io_req in the driver. + * The ioreq is linked to the SCSI Cmd, thus a link with the ULP's IO. + */ +int fnic_abort_cmd(struct scsi_cmnd *sc) +{ + struct fc_lport *lp; + struct fnic *fnic; + struct fnic_io_req *io_req; + struct fc_rport *rport; + spinlock_t *io_lock; + unsigned long flags; + int ret = SUCCESS; + u32 task_req; + struct scsi_lun fc_lun; + DECLARE_COMPLETION_ONSTACK(tm_done); + + /* Wait for rport to unblock */ + fnic_block_error_handler(sc); + + /* Get local-port, check ready and link up */ + lp = shost_priv(sc->device->host); + + fnic = lport_priv(lp); + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %d\n", + (starget_to_rport(scsi_target(sc->device)))->port_id, + sc->device->lun, sc->request->tag); + + if (lp->state != LPORT_ST_READY || !(lp->link_up)) { + ret = FAILED; + goto fnic_abort_cmd_end; + } + + /* + * Avoid a race between SCSI issuing the abort and the device + * completing the command. + * + * If the command is already completed by the fw cmpl code, + * we just return SUCCESS from here. This means that the abort + * succeeded. In the SCSI ML, since the timeout for command has + * happened, the completion wont actually complete the command + * and it will be considered as an aborted command + * + * The CMD_SP will not be cleared except while holding io_req_lock. + */ + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto fnic_abort_cmd_end; + } + + io_req->abts_done = &tm_done; + + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + goto wait_pending; + } + /* + * Command is still pending, need to abort it + * If the firmware completes the command after this point, + * the completion wont be done till mid-layer, since abort + * has already started. + */ + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + + spin_unlock_irqrestore(io_lock, flags); + + /* + * Check readiness of the remote port. If the path to remote + * port is up, then send abts to the remote port to terminate + * the IO. Else, just locally terminate the IO in the firmware + */ + rport = starget_to_rport(scsi_target(sc->device)); + if (fc_remote_port_chkready(rport) == 0) + task_req = FCPIO_ITMF_ABT_TASK; + else + task_req = FCPIO_ITMF_ABT_TASK_TERM; + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + + if (fnic_queue_abort_io_req(fnic, sc->request->tag, task_req, + fc_lun.scsi_lun, io_req)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + io_req->abts_done = NULL; + spin_unlock_irqrestore(io_lock, flags); + ret = FAILED; + goto fnic_abort_cmd_end; + } + + /* + * We queued an abort IO, wait for its completion. + * Once the firmware completes the abort command, it will + * wake up this thread. + */ + wait_pending: + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies + (2 * fnic->config.ra_tov + + fnic->config.ed_tov)); + + /* Check the abort status */ + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + ret = FAILED; + goto fnic_abort_cmd_end; + } + io_req->abts_done = NULL; + + /* fw did not complete abort, timed out */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + ret = FAILED; + goto fnic_abort_cmd_end; + } + + /* + * firmware completed the abort, check the status, + * free the io_req irrespective of failure or success + */ + if (CMD_ABTS_STATUS(sc) != FCPIO_SUCCESS) + ret = FAILED; + + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + +fnic_abort_cmd_end: + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Returning from abort cmd %s\n", + (ret == SUCCESS) ? + "SUCCESS" : "FAILED"); + return ret; +} + +static inline int fnic_queue_dr_io_req(struct fnic *fnic, + struct scsi_cmnd *sc, + struct fnic_io_req *io_req) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + struct scsi_lun fc_lun; + int ret = 0; + unsigned long intr_flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) { + ret = -EAGAIN; + goto lr_io_req_end; + } + + /* fill in the lun info */ + int_to_scsilun(sc->device->lun, &fc_lun); + + fnic_queue_wq_copy_desc_itmf(wq, sc->request->tag | FNIC_TAG_DEV_RST, + 0, FCPIO_ITMF_LUN_RESET, SCSI_NO_TAG, + fc_lun.scsi_lun, io_req->port_id, + fnic->config.ra_tov, fnic->config.ed_tov); + +lr_io_req_end: + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + + return ret; +} + +/* + * Clean up any pending aborts on the lun + * For each outstanding IO on this lun, whose abort is not completed by fw, + * issue a local abort. Wait for abort to complete. Return 0 if all commands + * successfully aborted, 1 otherwise + */ +static int fnic_clean_pending_aborts(struct fnic *fnic, + struct scsi_cmnd *lr_sc) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + int ret = 0; + struct scsi_cmnd *sc; + struct fc_rport *rport; + struct scsi_lun fc_lun; + struct scsi_device *lun_dev = lr_sc->device; + DECLARE_COMPLETION_ONSTACK(tm_done); + + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + /* + * ignore this lun reset cmd or cmds that do not belong to + * this lun + */ + if (!sc || sc == lr_sc || sc->device != lun_dev) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || sc->device != lun_dev) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to the LUN that we are resetting + */ + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Found IO in %s on lun\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + + BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING); + + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + io_req->abts_done = &tm_done; + spin_unlock_irqrestore(io_lock, flags); + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + rport = starget_to_rport(scsi_target(sc->device)); + + if (fnic_queue_abort_io_req(fnic, tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + io_req->abts_done = NULL; + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + goto clean_pending_aborts_end; + } + + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies + (fnic->config.ed_tov)); + + /* Recheck cmd state to check if it is now aborted */ + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + goto clean_pending_aborts_end; + } + + io_req->abts_done = NULL; + + /* if abort is still pending with fw, fail */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + goto clean_pending_aborts_end; + } + CMD_SP(sc) = NULL; + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + } + +clean_pending_aborts_end: + return ret; +} + +/* + * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN + * fail to get aborted. It calls driver's eh_device_reset with a SCSI command + * on the LUN. + */ +int fnic_device_reset(struct scsi_cmnd *sc) +{ + struct fc_lport *lp; + struct fnic *fnic; + struct fnic_io_req *io_req; + struct fc_rport *rport; + int status; + int ret = FAILED; + spinlock_t *io_lock; + unsigned long flags; + DECLARE_COMPLETION_ONSTACK(tm_done); + + /* Wait for rport to unblock */ + fnic_block_error_handler(sc); + + /* Get local-port, check ready and link up */ + lp = shost_priv(sc->device->host); + + fnic = lport_priv(lp); + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Device reset called FCID 0x%x, LUN 0x%x\n", + (starget_to_rport(scsi_target(sc->device)))->port_id, + sc->device->lun); + + + if (lp->state != LPORT_ST_READY || !(lp->link_up)) + goto fnic_device_reset_end; + + /* Check if remote port up */ + rport = starget_to_rport(scsi_target(sc->device)); + if (fc_remote_port_chkready(rport)) + goto fnic_device_reset_end; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + + /* + * If there is a io_req attached to this command, then use it, + * else allocate a new one. + */ + if (!io_req) { + io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto fnic_device_reset_end; + } + memset(io_req, 0, sizeof(*io_req)); + io_req->port_id = rport->port_id; + CMD_SP(sc) = (char *)io_req; + } + io_req->dr_done = &tm_done; + CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; + CMD_LR_STATUS(sc) = FCPIO_INVALID_CODE; + spin_unlock_irqrestore(io_lock, flags); + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %d\n", + sc->request->tag); + + /* + * issue the device reset, if enqueue failed, clean up the ioreq + * and break assoc with scsi cmd + */ + if (fnic_queue_dr_io_req(fnic, sc, io_req)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + io_req->dr_done = NULL; + goto fnic_device_reset_clean; + } + + /* + * Wait on the local completion for LUN reset. The io_req may be + * freed while we wait since we hold no lock. + */ + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT)); + + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto fnic_device_reset_end; + } + io_req->dr_done = NULL; + + status = CMD_LR_STATUS(sc); + spin_unlock_irqrestore(io_lock, flags); + + /* + * If lun reset not completed, bail out with failed. io_req + * gets cleaned up during higher levels of EH + */ + if (status == FCPIO_INVALID_CODE) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Device reset timed out\n"); + goto fnic_device_reset_end; + } + + /* Completed, but not successful, clean up the io_req, return fail */ + if (status != FCPIO_SUCCESS) { + spin_lock_irqsave(io_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Device reset completed - failed\n"); + io_req = (struct fnic_io_req *)CMD_SP(sc); + goto fnic_device_reset_clean; + } + + /* + * Clean up any aborts on this lun that have still not + * completed. If any of these fail, then LUN reset fails. + * clean_pending_aborts cleans all cmds on this lun except + * the lun reset cmd. If all cmds get cleaned, the lun reset + * succeeds + */ + if (fnic_clean_pending_aborts(fnic, sc)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Device reset failed" + " since could not abort all IOs\n"); + goto fnic_device_reset_clean; + } + + /* Clean lun reset command */ + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + /* Completed, and successful */ + ret = SUCCESS; + +fnic_device_reset_clean: + if (io_req) + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + if (io_req) { + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + } + +fnic_device_reset_end: + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Returning from device reset %s\n", + (ret == SUCCESS) ? + "SUCCESS" : "FAILED"); + return ret; +} + +/* Clean up all IOs, clean up libFC local port */ +int fnic_reset(struct Scsi_Host *shost) +{ + struct fc_lport *lp; + struct fnic *fnic; + int ret = SUCCESS; + + lp = shost_priv(shost); + fnic = lport_priv(lp); + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_reset called\n"); + + /* + * Reset local port, this will clean up libFC exchanges, + * reset remote port sessions, and if link is up, begin flogi + */ + if (lp->tt.lport_reset(lp)) + ret = FAILED; + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Returning from fnic reset %s\n", + (ret == SUCCESS) ? + "SUCCESS" : "FAILED"); + + return ret; +} + +/* + * SCSI Error handling calls driver's eh_host_reset if all prior + * error handling levels return FAILED. If host reset completes + * successfully, and if link is up, then Fabric login begins. + * + * Host Reset is the highest level of error recovery. If this fails, then + * host is offlined by SCSI. + * + */ +int fnic_host_reset(struct scsi_cmnd *sc) +{ + int ret; + unsigned long wait_host_tmo; + struct Scsi_Host *shost = sc->device->host; + struct fc_lport *lp = shost_priv(shost); + + /* + * If fnic_reset is successful, wait for fabric login to complete + * scsi-ml tries to send a TUR to every device if host reset is + * successful, so before returning to scsi, fabric should be up + */ + ret = fnic_reset(shost); + if (ret == SUCCESS) { + wait_host_tmo = jiffies + FNIC_HOST_RESET_SETTLE_TIME * HZ; + ret = FAILED; + while (time_before(jiffies, wait_host_tmo)) { + if ((lp->state == LPORT_ST_READY) && + (lp->link_up)) { + ret = SUCCESS; + break; + } + ssleep(1); + } + } + + return ret; +} + +/* + * This fxn is called from libFC when host is removed + */ +void fnic_scsi_abort_io(struct fc_lport *lp) +{ + int err = 0; + unsigned long flags; + enum fnic_state old_state; + struct fnic *fnic = lport_priv(lp); + DECLARE_COMPLETION_ONSTACK(remove_wait); + + /* Issue firmware reset for fnic, wait for reset to complete */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->remove_wait = &remove_wait; + old_state = fnic->state; + fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; + vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + err = fnic_fw_reset_handler(fnic); + if (err) { + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) + fnic->state = old_state; + fnic->remove_wait = NULL; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + return; + } + + /* Wait for firmware reset to complete */ + wait_for_completion_timeout(&remove_wait, + msecs_to_jiffies(FNIC_RMDEVICE_TIMEOUT)); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->remove_wait = NULL; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_scsi_abort_io %s\n", + (fnic->state == FNIC_IN_ETH_MODE) ? + "SUCCESS" : "FAILED"); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + +} + +/* + * This fxn called from libFC to clean up driver IO state on link down + */ +void fnic_scsi_cleanup(struct fc_lport *lp) +{ + unsigned long flags; + enum fnic_state old_state; + struct fnic *fnic = lport_priv(lp); + + /* issue fw reset */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + old_state = fnic->state; + fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; + vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (fnic_fw_reset_handler(fnic)) { + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) + fnic->state = old_state; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + } + +} + +void fnic_empty_scsi_cleanup(struct fc_lport *lp) +{ +} + +void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did) +{ + struct fnic *fnic = lport_priv(lp); + + /* Non-zero sid, nothing to do */ + if (sid) + goto call_fc_exch_mgr_reset; + + if (did) { + fnic_rport_exch_reset(fnic, did); + goto call_fc_exch_mgr_reset; + } + + /* + * sid = 0, did = 0 + * link down or device being removed + */ + if (!fnic->in_remove) + fnic_scsi_cleanup(lp); + else + fnic_scsi_abort_io(lp); + + /* call libFC exch mgr reset to reset its exchanges */ +call_fc_exch_mgr_reset: + fc_exch_mgr_reset(lp, sid, did); + +} diff --git a/drivers/scsi/fnic/rq_enet_desc.h b/drivers/scsi/fnic/rq_enet_desc.h new file mode 100644 index 000000000000..92e80ae6b725 --- /dev/null +++ b/drivers/scsi/fnic/rq_enet_desc.h @@ -0,0 +1,58 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _RQ_ENET_DESC_H_ +#define _RQ_ENET_DESC_H_ + +/* Ethernet receive queue descriptor: 16B */ +struct rq_enet_desc { + __le64 address; + __le16 length_type; + u8 reserved[6]; +}; + +enum rq_enet_type_types { + RQ_ENET_TYPE_ONLY_SOP = 0, + RQ_ENET_TYPE_NOT_SOP = 1, + RQ_ENET_TYPE_RESV2 = 2, + RQ_ENET_TYPE_RESV3 = 3, +}; + +#define RQ_ENET_ADDR_BITS 64 +#define RQ_ENET_LEN_BITS 14 +#define RQ_ENET_LEN_MASK ((1 << RQ_ENET_LEN_BITS) - 1) +#define RQ_ENET_TYPE_BITS 2 +#define RQ_ENET_TYPE_MASK ((1 << RQ_ENET_TYPE_BITS) - 1) + +static inline void rq_enet_desc_enc(struct rq_enet_desc *desc, + u64 address, u8 type, u16 length) +{ + desc->address = cpu_to_le64(address); + desc->length_type = cpu_to_le16((length & RQ_ENET_LEN_MASK) | + ((type & RQ_ENET_TYPE_MASK) << RQ_ENET_LEN_BITS)); +} + +static inline void rq_enet_desc_dec(struct rq_enet_desc *desc, + u64 *address, u8 *type, u16 *length) +{ + *address = le64_to_cpu(desc->address); + *length = le16_to_cpu(desc->length_type) & RQ_ENET_LEN_MASK; + *type = (u8)((le16_to_cpu(desc->length_type) >> RQ_ENET_LEN_BITS) & + RQ_ENET_TYPE_MASK); +} + +#endif /* _RQ_ENET_DESC_H_ */ diff --git a/drivers/scsi/fnic/vnic_cq.c b/drivers/scsi/fnic/vnic_cq.c new file mode 100644 index 000000000000..c5db32eda5ef --- /dev/null +++ b/drivers/scsi/fnic/vnic_cq.c @@ -0,0 +1,85 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include "vnic_dev.h" +#include "vnic_cq.h" + +void vnic_cq_free(struct vnic_cq *cq) +{ + vnic_dev_free_desc_ring(cq->vdev, &cq->ring); + + cq->ctrl = NULL; +} + +int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + + cq->index = index; + cq->vdev = vdev; + + cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index); + if (!cq->ctrl) { + printk(KERN_ERR "Failed to hook CQ[%d] resource\n", index); + return -EINVAL; + } + + err = vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size); + if (err) + return err; + + return 0; +} + +void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, + unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, + unsigned int cq_tail_color, unsigned int interrupt_enable, + unsigned int cq_entry_enable, unsigned int cq_message_enable, + unsigned int interrupt_offset, u64 cq_message_addr) +{ + u64 paddr; + + paddr = (u64)cq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &cq->ctrl->ring_base); + iowrite32(cq->ring.desc_count, &cq->ctrl->ring_size); + iowrite32(flow_control_enable, &cq->ctrl->flow_control_enable); + iowrite32(color_enable, &cq->ctrl->color_enable); + iowrite32(cq_head, &cq->ctrl->cq_head); + iowrite32(cq_tail, &cq->ctrl->cq_tail); + iowrite32(cq_tail_color, &cq->ctrl->cq_tail_color); + iowrite32(interrupt_enable, &cq->ctrl->interrupt_enable); + iowrite32(cq_entry_enable, &cq->ctrl->cq_entry_enable); + iowrite32(cq_message_enable, &cq->ctrl->cq_message_enable); + iowrite32(interrupt_offset, &cq->ctrl->interrupt_offset); + writeq(cq_message_addr, &cq->ctrl->cq_message_addr); +} + +void vnic_cq_clean(struct vnic_cq *cq) +{ + cq->to_clean = 0; + cq->last_color = 0; + + iowrite32(0, &cq->ctrl->cq_head); + iowrite32(0, &cq->ctrl->cq_tail); + iowrite32(1, &cq->ctrl->cq_tail_color); + + vnic_dev_clear_desc_ring(&cq->ring); +} diff --git a/drivers/scsi/fnic/vnic_cq.h b/drivers/scsi/fnic/vnic_cq.h new file mode 100644 index 000000000000..4ede6809fb1e --- /dev/null +++ b/drivers/scsi/fnic/vnic_cq.h @@ -0,0 +1,121 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_CQ_H_ +#define _VNIC_CQ_H_ + +#include "cq_desc.h" +#include "vnic_dev.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_cq_service fnic_cq_service +#define vnic_cq_free fnic_cq_free +#define vnic_cq_alloc fnic_cq_alloc +#define vnic_cq_init fnic_cq_init +#define vnic_cq_clean fnic_cq_clean + +/* Completion queue control */ +struct vnic_cq_ctrl { + u64 ring_base; /* 0x00 */ + u32 ring_size; /* 0x08 */ + u32 pad0; + u32 flow_control_enable; /* 0x10 */ + u32 pad1; + u32 color_enable; /* 0x18 */ + u32 pad2; + u32 cq_head; /* 0x20 */ + u32 pad3; + u32 cq_tail; /* 0x28 */ + u32 pad4; + u32 cq_tail_color; /* 0x30 */ + u32 pad5; + u32 interrupt_enable; /* 0x38 */ + u32 pad6; + u32 cq_entry_enable; /* 0x40 */ + u32 pad7; + u32 cq_message_enable; /* 0x48 */ + u32 pad8; + u32 interrupt_offset; /* 0x50 */ + u32 pad9; + u64 cq_message_addr; /* 0x58 */ + u32 pad10; +}; + +struct vnic_cq { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_cq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + unsigned int to_clean; + unsigned int last_color; +}; + +static inline unsigned int vnic_cq_service(struct vnic_cq *cq, + unsigned int work_to_do, + int (*q_service)(struct vnic_dev *vdev, struct cq_desc *cq_desc, + u8 type, u16 q_number, u16 completed_index, void *opaque), + void *opaque) +{ + struct cq_desc *cq_desc; + unsigned int work_done = 0; + u16 q_number, completed_index; + u8 type, color; + + cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + cq_desc_dec(cq_desc, &type, &color, + &q_number, &completed_index); + + while (color != cq->last_color) { + + if ((*q_service)(cq->vdev, cq_desc, type, + q_number, completed_index, opaque)) + break; + + cq->to_clean++; + if (cq->to_clean == cq->ring.desc_count) { + cq->to_clean = 0; + cq->last_color = cq->last_color ? 0 : 1; + } + + cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + cq_desc_dec(cq_desc, &type, &color, + &q_number, &completed_index); + + work_done++; + if (work_done >= work_to_do) + break; + } + + return work_done; +} + +void vnic_cq_free(struct vnic_cq *cq); +int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, + unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, + unsigned int cq_tail_color, unsigned int interrupt_enable, + unsigned int cq_entry_enable, unsigned int message_enable, + unsigned int interrupt_offset, u64 message_addr); +void vnic_cq_clean(struct vnic_cq *cq); + +#endif /* _VNIC_CQ_H_ */ diff --git a/drivers/scsi/fnic/vnic_cq_copy.h b/drivers/scsi/fnic/vnic_cq_copy.h new file mode 100644 index 000000000000..7901ce255a81 --- /dev/null +++ b/drivers/scsi/fnic/vnic_cq_copy.h @@ -0,0 +1,62 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_CQ_COPY_H_ +#define _VNIC_CQ_COPY_H_ + +#include "fcpio.h" + +static inline unsigned int vnic_cq_copy_service( + struct vnic_cq *cq, + int (*q_service)(struct vnic_dev *vdev, + unsigned int index, + struct fcpio_fw_req *desc), + unsigned int work_to_do) + +{ + struct fcpio_fw_req *desc; + unsigned int work_done = 0; + u8 color; + + desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + fcpio_color_dec(desc, &color); + + while (color != cq->last_color) { + + if ((*q_service)(cq->vdev, cq->index, desc)) + break; + + cq->to_clean++; + if (cq->to_clean == cq->ring.desc_count) { + cq->to_clean = 0; + cq->last_color = cq->last_color ? 0 : 1; + } + + desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + fcpio_color_dec(desc, &color); + + work_done++; + if (work_done >= work_to_do) + break; + } + + return work_done; +} + +#endif /* _VNIC_CQ_COPY_H_ */ diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c new file mode 100644 index 000000000000..566770645086 --- /dev/null +++ b/drivers/scsi/fnic/vnic_dev.c @@ -0,0 +1,690 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "vnic_resource.h" +#include "vnic_devcmd.h" +#include "vnic_dev.h" +#include "vnic_stats.h" + +struct vnic_res { + void __iomem *vaddr; + unsigned int count; +}; + +struct vnic_dev { + void *priv; + struct pci_dev *pdev; + struct vnic_res res[RES_TYPE_MAX]; + enum vnic_dev_intr_mode intr_mode; + struct vnic_devcmd __iomem *devcmd; + struct vnic_devcmd_notify *notify; + struct vnic_devcmd_notify notify_copy; + dma_addr_t notify_pa; + u32 *linkstatus; + dma_addr_t linkstatus_pa; + struct vnic_stats *stats; + dma_addr_t stats_pa; + struct vnic_devcmd_fw_info *fw_info; + dma_addr_t fw_info_pa; +}; + +#define VNIC_MAX_RES_HDR_SIZE \ + (sizeof(struct vnic_resource_header) + \ + sizeof(struct vnic_resource) * RES_TYPE_MAX) +#define VNIC_RES_STRIDE 128 + +void *vnic_dev_priv(struct vnic_dev *vdev) +{ + return vdev->priv; +} + +static int vnic_dev_discover_res(struct vnic_dev *vdev, + struct vnic_dev_bar *bar) +{ + struct vnic_resource_header __iomem *rh; + struct vnic_resource __iomem *r; + u8 type; + + if (bar->len < VNIC_MAX_RES_HDR_SIZE) { + printk(KERN_ERR "vNIC BAR0 res hdr length error\n"); + return -EINVAL; + } + + rh = bar->vaddr; + if (!rh) { + printk(KERN_ERR "vNIC BAR0 res hdr not mem-mapped\n"); + return -EINVAL; + } + + if (ioread32(&rh->magic) != VNIC_RES_MAGIC || + ioread32(&rh->version) != VNIC_RES_VERSION) { + printk(KERN_ERR "vNIC BAR0 res magic/version error " + "exp (%lx/%lx) curr (%x/%x)\n", + VNIC_RES_MAGIC, VNIC_RES_VERSION, + ioread32(&rh->magic), ioread32(&rh->version)); + return -EINVAL; + } + + r = (struct vnic_resource __iomem *)(rh + 1); + + while ((type = ioread8(&r->type)) != RES_TYPE_EOL) { + + u8 bar_num = ioread8(&r->bar); + u32 bar_offset = ioread32(&r->bar_offset); + u32 count = ioread32(&r->count); + u32 len; + + r++; + + if (bar_num != 0) /* only mapping in BAR0 resources */ + continue; + + switch (type) { + case RES_TYPE_WQ: + case RES_TYPE_RQ: + case RES_TYPE_CQ: + case RES_TYPE_INTR_CTRL: + /* each count is stride bytes long */ + len = count * VNIC_RES_STRIDE; + if (len + bar_offset > bar->len) { + printk(KERN_ERR "vNIC BAR0 resource %d " + "out-of-bounds, offset 0x%x + " + "size 0x%x > bar len 0x%lx\n", + type, bar_offset, + len, + bar->len); + return -EINVAL; + } + break; + case RES_TYPE_INTR_PBA_LEGACY: + case RES_TYPE_DEVCMD: + len = count; + break; + default: + continue; + } + + vdev->res[type].count = count; + vdev->res[type].vaddr = (char __iomem *)bar->vaddr + bar_offset; + } + + return 0; +} + +unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, + enum vnic_res_type type) +{ + return vdev->res[type].count; +} + +void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, + unsigned int index) +{ + if (!vdev->res[type].vaddr) + return NULL; + + switch (type) { + case RES_TYPE_WQ: + case RES_TYPE_RQ: + case RES_TYPE_CQ: + case RES_TYPE_INTR_CTRL: + return (char __iomem *)vdev->res[type].vaddr + + index * VNIC_RES_STRIDE; + default: + return (char __iomem *)vdev->res[type].vaddr; + } +} + +unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, + unsigned int desc_count, + unsigned int desc_size) +{ + /* The base address of the desc rings must be 512 byte aligned. + * Descriptor count is aligned to groups of 32 descriptors. A + * count of 0 means the maximum 4096 descriptors. Descriptor + * size is aligned to 16 bytes. + */ + + unsigned int count_align = 32; + unsigned int desc_align = 16; + + ring->base_align = 512; + + if (desc_count == 0) + desc_count = 4096; + + ring->desc_count = ALIGN(desc_count, count_align); + + ring->desc_size = ALIGN(desc_size, desc_align); + + ring->size = ring->desc_count * ring->desc_size; + ring->size_unaligned = ring->size + ring->base_align; + + return ring->size_unaligned; +} + +void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring) +{ + memset(ring->descs, 0, ring->size); +} + +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size) +{ + vnic_dev_desc_ring_size(ring, desc_count, desc_size); + + ring->descs_unaligned = pci_alloc_consistent(vdev->pdev, + ring->size_unaligned, + &ring->base_addr_unaligned); + + if (!ring->descs_unaligned) { + printk(KERN_ERR + "Failed to allocate ring (size=%d), aborting\n", + (int)ring->size); + return -ENOMEM; + } + + ring->base_addr = ALIGN(ring->base_addr_unaligned, + ring->base_align); + ring->descs = (u8 *)ring->descs_unaligned + + (ring->base_addr - ring->base_addr_unaligned); + + vnic_dev_clear_desc_ring(ring); + + ring->desc_avail = ring->desc_count - 1; + + return 0; +} + +void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring) +{ + if (ring->descs) { + pci_free_consistent(vdev->pdev, + ring->size_unaligned, + ring->descs_unaligned, + ring->base_addr_unaligned); + ring->descs = NULL; + } +} + +int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + u64 *a0, u64 *a1, int wait) +{ + struct vnic_devcmd __iomem *devcmd = vdev->devcmd; + int delay; + u32 status; + int dev_cmd_err[] = { + /* convert from fw's version of error.h to host's version */ + 0, /* ERR_SUCCESS */ + EINVAL, /* ERR_EINVAL */ + EFAULT, /* ERR_EFAULT */ + EPERM, /* ERR_EPERM */ + EBUSY, /* ERR_EBUSY */ + }; + int err; + + status = ioread32(&devcmd->status); + if (status & STAT_BUSY) { + printk(KERN_ERR "Busy devcmd %d\n", _CMD_N(cmd)); + return -EBUSY; + } + + if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) { + writeq(*a0, &devcmd->args[0]); + writeq(*a1, &devcmd->args[1]); + wmb(); + } + + iowrite32(cmd, &devcmd->cmd); + + if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT)) + return 0; + + for (delay = 0; delay < wait; delay++) { + + udelay(100); + + status = ioread32(&devcmd->status); + if (!(status & STAT_BUSY)) { + + if (status & STAT_ERROR) { + err = dev_cmd_err[(int)readq(&devcmd->args[0])]; + printk(KERN_ERR "Error %d devcmd %d\n", + err, _CMD_N(cmd)); + return -err; + } + + if (_CMD_DIR(cmd) & _CMD_DIR_READ) { + rmb(); + *a0 = readq(&devcmd->args[0]); + *a1 = readq(&devcmd->args[1]); + } + + return 0; + } + } + + printk(KERN_ERR "Timedout devcmd %d\n", _CMD_N(cmd)); + return -ETIMEDOUT; +} + +int vnic_dev_fw_info(struct vnic_dev *vdev, + struct vnic_devcmd_fw_info **fw_info) +{ + u64 a0, a1 = 0; + int wait = 1000; + int err = 0; + + if (!vdev->fw_info) { + vdev->fw_info = pci_alloc_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_fw_info), + &vdev->fw_info_pa); + if (!vdev->fw_info) + return -ENOMEM; + + a0 = vdev->fw_info_pa; + + /* only get fw_info once and cache it */ + err = vnic_dev_cmd(vdev, CMD_MCPU_FW_INFO, &a0, &a1, wait); + } + + *fw_info = vdev->fw_info; + + return err; +} + +int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, unsigned int size, + void *value) +{ + u64 a0, a1; + int wait = 1000; + int err; + + a0 = offset; + a1 = size; + + err = vnic_dev_cmd(vdev, CMD_DEV_SPEC, &a0, &a1, wait); + + switch (size) { + case 1: + *(u8 *)value = (u8)a0; + break; + case 2: + *(u16 *)value = (u16)a0; + break; + case 4: + *(u32 *)value = (u32)a0; + break; + case 8: + *(u64 *)value = a0; + break; + default: + BUG(); + break; + } + + return err; +} + +int vnic_dev_stats_clear(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_STATS_CLEAR, &a0, &a1, wait); +} + +int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats) +{ + u64 a0, a1; + int wait = 1000; + + if (!vdev->stats) { + vdev->stats = pci_alloc_consistent(vdev->pdev, + sizeof(struct vnic_stats), &vdev->stats_pa); + if (!vdev->stats) + return -ENOMEM; + } + + *stats = vdev->stats; + a0 = vdev->stats_pa; + a1 = sizeof(struct vnic_stats); + + return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait); +} + +int vnic_dev_close(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait); +} + +int vnic_dev_enable(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait); +} + +int vnic_dev_disable(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_DISABLE, &a0, &a1, wait); +} + +int vnic_dev_open(struct vnic_dev *vdev, int arg) +{ + u64 a0 = (u32)arg, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_OPEN, &a0, &a1, wait); +} + +int vnic_dev_open_done(struct vnic_dev *vdev, int *done) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + + *done = 0; + + err = vnic_dev_cmd(vdev, CMD_OPEN_STATUS, &a0, &a1, wait); + if (err) + return err; + + *done = (a0 == 0); + + return 0; +} + +int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg) +{ + u64 a0 = (u32)arg, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_SOFT_RESET, &a0, &a1, wait); +} + +int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + + *done = 0; + + err = vnic_dev_cmd(vdev, CMD_SOFT_RESET_STATUS, &a0, &a1, wait); + if (err) + return err; + + *done = (a0 == 0); + + return 0; +} + +int vnic_dev_hang_notify(struct vnic_dev *vdev) +{ + u64 a0, a1; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_HANG_NOTIFY, &a0, &a1, wait); +} + +int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr) +{ + u64 a0, a1; + int wait = 1000; + int err, i; + + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] = 0; + + err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a0, &a1, wait); + if (err) + return err; + + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] = ((u8 *)&a0)[i]; + + return 0; +} + +void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, + int broadcast, int promisc, int allmulti) +{ + u64 a0, a1 = 0; + int wait = 1000; + int err; + + a0 = (directed ? CMD_PFILTER_DIRECTED : 0) | + (multicast ? CMD_PFILTER_MULTICAST : 0) | + (broadcast ? CMD_PFILTER_BROADCAST : 0) | + (promisc ? CMD_PFILTER_PROMISCUOUS : 0) | + (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); + + err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait); + if (err) + printk(KERN_ERR "Can't set packet filter\n"); +} + +void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + int i; + + for (i = 0; i < ETH_ALEN; i++) + ((u8 *)&a0)[i] = addr[i]; + + err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); + if (err) + printk(KERN_ERR + "Can't add addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], + err); +} + +void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + int i; + + for (i = 0; i < ETH_ALEN; i++) + ((u8 *)&a0)[i] = addr[i]; + + err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait); + if (err) + printk(KERN_ERR + "Can't del addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], + err); +} + +int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr) +{ + u64 a0, a1; + int wait = 1000; + + if (!vdev->notify) { + vdev->notify = pci_alloc_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_notify), + &vdev->notify_pa); + if (!vdev->notify) + return -ENOMEM; + } + + a0 = vdev->notify_pa; + a1 = ((u64)intr << 32) & 0x0000ffff00000000ULL; + a1 += sizeof(struct vnic_devcmd_notify); + + return vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); +} + +void vnic_dev_notify_unset(struct vnic_dev *vdev) +{ + u64 a0, a1; + int wait = 1000; + + a0 = 0; /* paddr = 0 to unset notify buffer */ + a1 = 0x0000ffff00000000ULL; /* intr num = -1 to unreg for intr */ + a1 += sizeof(struct vnic_devcmd_notify); + + vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); +} + +static int vnic_dev_notify_ready(struct vnic_dev *vdev) +{ + u32 *words; + unsigned int nwords = sizeof(struct vnic_devcmd_notify) / 4; + unsigned int i; + u32 csum; + + if (!vdev->notify) + return 0; + + do { + csum = 0; + memcpy(&vdev->notify_copy, vdev->notify, + sizeof(struct vnic_devcmd_notify)); + words = (u32 *)&vdev->notify_copy; + for (i = 1; i < nwords; i++) + csum += words[i]; + } while (csum != words[0]); + + return 1; +} + +int vnic_dev_init(struct vnic_dev *vdev, int arg) +{ + u64 a0 = (u32)arg, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_INIT, &a0, &a1, wait); +} + +int vnic_dev_link_status(struct vnic_dev *vdev) +{ + if (vdev->linkstatus) + return *vdev->linkstatus; + + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.link_state; +} + +u32 vnic_dev_port_speed(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.port_speed; +} + +u32 vnic_dev_msg_lvl(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.msglvl; +} + +u32 vnic_dev_mtu(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.mtu; +} + +u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.link_down_cnt; +} + +void vnic_dev_set_intr_mode(struct vnic_dev *vdev, + enum vnic_dev_intr_mode intr_mode) +{ + vdev->intr_mode = intr_mode; +} + +enum vnic_dev_intr_mode vnic_dev_get_intr_mode( + struct vnic_dev *vdev) +{ + return vdev->intr_mode; +} + +void vnic_dev_unregister(struct vnic_dev *vdev) +{ + if (vdev) { + if (vdev->notify) + pci_free_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_notify), + vdev->notify, + vdev->notify_pa); + if (vdev->linkstatus) + pci_free_consistent(vdev->pdev, + sizeof(u32), + vdev->linkstatus, + vdev->linkstatus_pa); + if (vdev->stats) + pci_free_consistent(vdev->pdev, + sizeof(struct vnic_dev), + vdev->stats, vdev->stats_pa); + if (vdev->fw_info) + pci_free_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_fw_info), + vdev->fw_info, vdev->fw_info_pa); + kfree(vdev); + } +} + +struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, + void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar) +{ + if (!vdev) { + vdev = kzalloc(sizeof(struct vnic_dev), GFP_KERNEL); + if (!vdev) + return NULL; + } + + vdev->priv = priv; + vdev->pdev = pdev; + + if (vnic_dev_discover_res(vdev, bar)) + goto err_out; + + vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0); + if (!vdev->devcmd) + goto err_out; + + return vdev; + +err_out: + vnic_dev_unregister(vdev); + return NULL; +} diff --git a/drivers/scsi/fnic/vnic_dev.h b/drivers/scsi/fnic/vnic_dev.h new file mode 100644 index 000000000000..f9935a8a5a09 --- /dev/null +++ b/drivers/scsi/fnic/vnic_dev.h @@ -0,0 +1,161 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_DEV_H_ +#define _VNIC_DEV_H_ + +#include "vnic_resource.h" +#include "vnic_devcmd.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_dev_priv fnic_dev_priv +#define vnic_dev_get_res_count fnic_dev_get_res_count +#define vnic_dev_get_res fnic_dev_get_res +#define vnic_dev_desc_ring_size fnic_dev_desc_ring_siz +#define vnic_dev_clear_desc_ring fnic_dev_clear_desc_ring +#define vnic_dev_alloc_desc_ring fnic_dev_alloc_desc_ring +#define vnic_dev_free_desc_ring fnic_dev_free_desc_ring +#define vnic_dev_cmd fnic_dev_cmd +#define vnic_dev_fw_info fnic_dev_fw_info +#define vnic_dev_spec fnic_dev_spec +#define vnic_dev_stats_clear fnic_dev_stats_clear +#define vnic_dev_stats_dump fnic_dev_stats_dump +#define vnic_dev_hang_notify fnic_dev_hang_notify +#define vnic_dev_packet_filter fnic_dev_packet_filter +#define vnic_dev_add_addr fnic_dev_add_addr +#define vnic_dev_del_addr fnic_dev_del_addr +#define vnic_dev_mac_addr fnic_dev_mac_addr +#define vnic_dev_notify_set fnic_dev_notify_set +#define vnic_dev_notify_unset fnic_dev_notify_unset +#define vnic_dev_link_status fnic_dev_link_status +#define vnic_dev_port_speed fnic_dev_port_speed +#define vnic_dev_msg_lvl fnic_dev_msg_lvl +#define vnic_dev_mtu fnic_dev_mtu +#define vnic_dev_link_down_cnt fnic_dev_link_down_cnt +#define vnic_dev_close fnic_dev_close +#define vnic_dev_enable fnic_dev_enable +#define vnic_dev_disable fnic_dev_disable +#define vnic_dev_open fnic_dev_open +#define vnic_dev_open_done fnic_dev_open_done +#define vnic_dev_init fnic_dev_init +#define vnic_dev_soft_reset fnic_dev_soft_reset +#define vnic_dev_soft_reset_done fnic_dev_soft_reset_done +#define vnic_dev_set_intr_mode fnic_dev_set_intr_mode +#define vnic_dev_get_intr_mode fnic_dev_get_intr_mode +#define vnic_dev_unregister fnic_dev_unregister +#define vnic_dev_register fnic_dev_register + +#ifndef VNIC_PADDR_TARGET +#define VNIC_PADDR_TARGET 0x0000000000000000ULL +#endif + +#ifndef readq +static inline u64 readq(void __iomem *reg) +{ + return ((u64)readl(reg + 0x4UL) << 32) | (u64)readl(reg); +} + +static inline void writeq(u64 val, void __iomem *reg) +{ + writel(val & 0xffffffff, reg); + writel(val >> 32, reg + 0x4UL); +} +#endif + +enum vnic_dev_intr_mode { + VNIC_DEV_INTR_MODE_UNKNOWN, + VNIC_DEV_INTR_MODE_INTX, + VNIC_DEV_INTR_MODE_MSI, + VNIC_DEV_INTR_MODE_MSIX, +}; + +struct vnic_dev_bar { + void __iomem *vaddr; + dma_addr_t bus_addr; + unsigned long len; +}; + +struct vnic_dev_ring { + void *descs; + size_t size; + dma_addr_t base_addr; + size_t base_align; + void *descs_unaligned; + size_t size_unaligned; + dma_addr_t base_addr_unaligned; + unsigned int desc_size; + unsigned int desc_count; + unsigned int desc_avail; +}; + +struct vnic_dev; +struct vnic_stats; + +void *vnic_dev_priv(struct vnic_dev *vdev); +unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, + enum vnic_res_type type); +void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, + unsigned int index); +unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, + unsigned int desc_count, + unsigned int desc_size); +void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring); +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size); +void vnic_dev_free_desc_ring(struct vnic_dev *vdev, + struct vnic_dev_ring *ring); +int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + u64 *a0, u64 *a1, int wait); +int vnic_dev_fw_info(struct vnic_dev *vdev, + struct vnic_devcmd_fw_info **fw_info); +int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, + unsigned int size, void *value); +int vnic_dev_stats_clear(struct vnic_dev *vdev); +int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats); +int vnic_dev_hang_notify(struct vnic_dev *vdev); +void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, + int broadcast, int promisc, int allmulti); +void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr); +void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr); +int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr); +int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr); +void vnic_dev_notify_unset(struct vnic_dev *vdev); +int vnic_dev_link_status(struct vnic_dev *vdev); +u32 vnic_dev_port_speed(struct vnic_dev *vdev); +u32 vnic_dev_msg_lvl(struct vnic_dev *vdev); +u32 vnic_dev_mtu(struct vnic_dev *vdev); +u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev); +int vnic_dev_close(struct vnic_dev *vdev); +int vnic_dev_enable(struct vnic_dev *vdev); +int vnic_dev_disable(struct vnic_dev *vdev); +int vnic_dev_open(struct vnic_dev *vdev, int arg); +int vnic_dev_open_done(struct vnic_dev *vdev, int *done); +int vnic_dev_init(struct vnic_dev *vdev, int arg); +int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg); +int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done); +void vnic_dev_set_intr_mode(struct vnic_dev *vdev, + enum vnic_dev_intr_mode intr_mode); +enum vnic_dev_intr_mode vnic_dev_get_intr_mode(struct vnic_dev *vdev); +void vnic_dev_unregister(struct vnic_dev *vdev); +struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, + void *priv, struct pci_dev *pdev, + struct vnic_dev_bar *bar); + +#endif /* _VNIC_DEV_H_ */ diff --git a/drivers/scsi/fnic/vnic_devcmd.h b/drivers/scsi/fnic/vnic_devcmd.h new file mode 100644 index 000000000000..d62b9061bf12 --- /dev/null +++ b/drivers/scsi/fnic/vnic_devcmd.h @@ -0,0 +1,281 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_DEVCMD_H_ +#define _VNIC_DEVCMD_H_ + +#define _CMD_NBITS 14 +#define _CMD_VTYPEBITS 10 +#define _CMD_FLAGSBITS 6 +#define _CMD_DIRBITS 2 + +#define _CMD_NMASK ((1 << _CMD_NBITS)-1) +#define _CMD_VTYPEMASK ((1 << _CMD_VTYPEBITS)-1) +#define _CMD_FLAGSMASK ((1 << _CMD_FLAGSBITS)-1) +#define _CMD_DIRMASK ((1 << _CMD_DIRBITS)-1) + +#define _CMD_NSHIFT 0 +#define _CMD_VTYPESHIFT (_CMD_NSHIFT+_CMD_NBITS) +#define _CMD_FLAGSSHIFT (_CMD_VTYPESHIFT+_CMD_VTYPEBITS) +#define _CMD_DIRSHIFT (_CMD_FLAGSSHIFT+_CMD_FLAGSBITS) + +/* + * Direction bits (from host perspective). + */ +#define _CMD_DIR_NONE 0U +#define _CMD_DIR_WRITE 1U +#define _CMD_DIR_READ 2U +#define _CMD_DIR_RW (_CMD_DIR_WRITE | _CMD_DIR_READ) + +/* + * Flag bits. + */ +#define _CMD_FLAGS_NONE 0U +#define _CMD_FLAGS_NOWAIT 1U + +/* + * vNIC type bits. + */ +#define _CMD_VTYPE_NONE 0U +#define _CMD_VTYPE_ENET 1U +#define _CMD_VTYPE_FC 2U +#define _CMD_VTYPE_SCSI 4U +#define _CMD_VTYPE_ALL (_CMD_VTYPE_ENET | _CMD_VTYPE_FC | _CMD_VTYPE_SCSI) + +/* + * Used to create cmds.. +*/ +#define _CMDCF(dir, flags, vtype, nr) \ + (((dir) << _CMD_DIRSHIFT) | \ + ((flags) << _CMD_FLAGSSHIFT) | \ + ((vtype) << _CMD_VTYPESHIFT) | \ + ((nr) << _CMD_NSHIFT)) +#define _CMDC(dir, vtype, nr) _CMDCF(dir, 0, vtype, nr) +#define _CMDCNW(dir, vtype, nr) _CMDCF(dir, _CMD_FLAGS_NOWAIT, vtype, nr) + +/* + * Used to decode cmds.. +*/ +#define _CMD_DIR(cmd) (((cmd) >> _CMD_DIRSHIFT) & _CMD_DIRMASK) +#define _CMD_FLAGS(cmd) (((cmd) >> _CMD_FLAGSSHIFT) & _CMD_FLAGSMASK) +#define _CMD_VTYPE(cmd) (((cmd) >> _CMD_VTYPESHIFT) & _CMD_VTYPEMASK) +#define _CMD_N(cmd) (((cmd) >> _CMD_NSHIFT) & _CMD_NMASK) + +enum vnic_devcmd_cmd { + CMD_NONE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_NONE, 0), + + /* mcpu fw info in mem: (u64)a0=paddr to struct vnic_devcmd_fw_info */ + CMD_MCPU_FW_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 1), + + /* dev-specific block member: + * in: (u16)a0=offset,(u8)a1=size + * out: a0=value */ + CMD_DEV_SPEC = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 2), + + /* stats clear */ + CMD_STATS_CLEAR = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 3), + + /* stats dump in mem: (u64)a0=paddr to stats area, + * (u16)a1=sizeof stats area */ + CMD_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 4), + + /* set Rx packet filter: (u32)a0=filters (see CMD_PFILTER_*) */ + CMD_PACKET_FILTER = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 7), + + /* hang detection notification */ + CMD_HANG_NOTIFY = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 8), + + /* MAC address in (u48)a0 */ + CMD_MAC_ADDR = _CMDC(_CMD_DIR_READ, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 9), + + /* disable/enable promisc mode: (u8)a0=0/1 */ +/***** XXX DEPRECATED *****/ + CMD_PROMISC_MODE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 10), + + /* disable/enable all-multi mode: (u8)a0=0/1 */ +/***** XXX DEPRECATED *****/ + CMD_ALLMULTI_MODE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 11), + + /* add addr from (u48)a0 */ + CMD_ADDR_ADD = _CMDCNW(_CMD_DIR_WRITE, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 12), + + /* del addr from (u48)a0 */ + CMD_ADDR_DEL = _CMDCNW(_CMD_DIR_WRITE, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 13), + + /* add VLAN id in (u16)a0 */ + CMD_VLAN_ADD = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 14), + + /* del VLAN id in (u16)a0 */ + CMD_VLAN_DEL = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 15), + + /* nic_cfg in (u32)a0 */ + CMD_NIC_CFG = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 16), + + /* union vnic_rss_key in mem: (u64)a0=paddr, (u16)a1=len */ + CMD_RSS_KEY = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 17), + + /* union vnic_rss_cpu in mem: (u64)a0=paddr, (u16)a1=len */ + CMD_RSS_CPU = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 18), + + /* initiate softreset */ + CMD_SOFT_RESET = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 19), + + /* softreset status: + * out: a0=0 reset complete, a0=1 reset in progress */ + CMD_SOFT_RESET_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 20), + + /* set struct vnic_devcmd_notify buffer in mem: + * in: + * (u64)a0=paddr to notify (set paddr=0 to unset) + * (u32)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify) + * (u16)a1 & 0x0000ffff00000000=intr num (-1 for no intr) + * out: + * (u32)a1 = effective size + */ + CMD_NOTIFY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 21), + + /* UNDI API: (u64)a0=paddr to s_PXENV_UNDI_ struct, + * (u8)a1=PXENV_UNDI_xxx */ + CMD_UNDI = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 22), + + /* initiate open sequence (u32)a0=flags (see CMD_OPENF_*) */ + CMD_OPEN = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 23), + + /* open status: + * out: a0=0 open complete, a0=1 open in progress */ + CMD_OPEN_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 24), + + /* close vnic */ + CMD_CLOSE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 25), + + /* initialize virtual link: (u32)a0=flags (see CMD_INITF_*) */ + CMD_INIT = _CMDCNW(_CMD_DIR_READ, _CMD_VTYPE_ALL, 26), + + /* variant of CMD_INIT, with provisioning info + * (u64)a0=paddr of vnic_devcmd_provinfo + * (u32)a1=sizeof provision info */ + CMD_INIT_PROV_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 27), + + /* enable virtual link */ + CMD_ENABLE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28), + + /* disable virtual link */ + CMD_DISABLE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 29), + + /* stats dump all vnics on uplink in mem: (u64)a0=paddr (u32)a1=uif */ + CMD_STATS_DUMP_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 30), + + /* init status: + * out: a0=0 init complete, a0=1 init in progress + * if a0=0, a1=errno */ + CMD_INIT_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 31), + + /* INT13 API: (u64)a0=paddr to vnic_int13_params struct + * (u8)a1=INT13_CMD_xxx */ + CMD_INT13 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_FC, 32), + + /* logical uplink enable/disable: (u64)a0: 0/1=disable/enable */ + CMD_LOGICAL_UPLINK = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 33), + + /* undo initialize of virtual link */ + CMD_DEINIT = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 34), +}; + +/* flags for CMD_OPEN */ +#define CMD_OPENF_OPROM 0x1 /* open coming from option rom */ + +/* flags for CMD_INIT */ +#define CMD_INITF_DEFAULT_MAC 0x1 /* init with default mac addr */ + +/* flags for CMD_PACKET_FILTER */ +#define CMD_PFILTER_DIRECTED 0x01 +#define CMD_PFILTER_MULTICAST 0x02 +#define CMD_PFILTER_BROADCAST 0x04 +#define CMD_PFILTER_PROMISCUOUS 0x08 +#define CMD_PFILTER_ALL_MULTICAST 0x10 + +enum vnic_devcmd_status { + STAT_NONE = 0, + STAT_BUSY = 1 << 0, /* cmd in progress */ + STAT_ERROR = 1 << 1, /* last cmd caused error (code in a0) */ +}; + +enum vnic_devcmd_error { + ERR_SUCCESS = 0, + ERR_EINVAL = 1, + ERR_EFAULT = 2, + ERR_EPERM = 3, + ERR_EBUSY = 4, + ERR_ECMDUNKNOWN = 5, + ERR_EBADSTATE = 6, + ERR_ENOMEM = 7, + ERR_ETIMEDOUT = 8, + ERR_ELINKDOWN = 9, +}; + +struct vnic_devcmd_fw_info { + char fw_version[32]; + char fw_build[32]; + char hw_version[32]; + char hw_serial_number[32]; +}; + +struct vnic_devcmd_notify { + u32 csum; /* checksum over following words */ + + u32 link_state; /* link up == 1 */ + u32 port_speed; /* effective port speed (rate limit) */ + u32 mtu; /* MTU */ + u32 msglvl; /* requested driver msg lvl */ + u32 uif; /* uplink interface */ + u32 status; /* status bits (see VNIC_STF_*) */ + u32 error; /* error code (see ERR_*) for first ERR */ + u32 link_down_cnt; /* running count of link down transitions */ +}; +#define VNIC_STF_FATAL_ERR 0x0001 /* fatal fw error */ + +struct vnic_devcmd_provinfo { + u8 oui[3]; + u8 type; + u8 data[0]; +}; + +/* + * Writing cmd register causes STAT_BUSY to get set in status register. + * When cmd completes, STAT_BUSY will be cleared. + * + * If cmd completed successfully STAT_ERROR will be clear + * and args registers contain cmd-specific results. + * + * If cmd error, STAT_ERROR will be set and args[0] contains error code. + * + * status register is read-only. While STAT_BUSY is set, + * all other register contents are read-only. + */ + +/* Make sizeof(vnic_devcmd) a power-of-2 for I/O BAR. */ +#define VNIC_DEVCMD_NARGS 15 +struct vnic_devcmd { + u32 status; /* RO */ + u32 cmd; /* RW */ + u64 args[VNIC_DEVCMD_NARGS]; /* RW cmd args (little-endian) */ +}; + +#endif /* _VNIC_DEVCMD_H_ */ diff --git a/drivers/scsi/fnic/vnic_intr.c b/drivers/scsi/fnic/vnic_intr.c new file mode 100644 index 000000000000..4f4dc8793d23 --- /dev/null +++ b/drivers/scsi/fnic/vnic_intr.c @@ -0,0 +1,60 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "vnic_dev.h" +#include "vnic_intr.h" + +void vnic_intr_free(struct vnic_intr *intr) +{ + intr->ctrl = NULL; +} + +int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index) +{ + intr->index = index; + intr->vdev = vdev; + + intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index); + if (!intr->ctrl) { + printk(KERN_ERR "Failed to hook INTR[%d].ctrl resource\n", + index); + return -EINVAL; + } + + return 0; +} + +void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer, + unsigned int coalescing_type, unsigned int mask_on_assertion) +{ + iowrite32(coalescing_timer, &intr->ctrl->coalescing_timer); + iowrite32(coalescing_type, &intr->ctrl->coalescing_type); + iowrite32(mask_on_assertion, &intr->ctrl->mask_on_assertion); + iowrite32(0, &intr->ctrl->int_credits); +} + +void vnic_intr_clean(struct vnic_intr *intr) +{ + iowrite32(0, &intr->ctrl->int_credits); +} diff --git a/drivers/scsi/fnic/vnic_intr.h b/drivers/scsi/fnic/vnic_intr.h new file mode 100644 index 000000000000..d5fb40e7c98e --- /dev/null +++ b/drivers/scsi/fnic/vnic_intr.h @@ -0,0 +1,118 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_INTR_H_ +#define _VNIC_INTR_H_ + +#include +#include "vnic_dev.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_intr_unmask fnic_intr_unmask +#define vnic_intr_mask fnic_intr_mask +#define vnic_intr_return_credits fnic_intr_return_credits +#define vnic_intr_credits fnic_intr_credits +#define vnic_intr_return_all_credits fnic_intr_return_all_credits +#define vnic_intr_legacy_pba fnic_intr_legacy_pba +#define vnic_intr_free fnic_intr_free +#define vnic_intr_alloc fnic_intr_alloc +#define vnic_intr_init fnic_intr_init +#define vnic_intr_clean fnic_intr_clean + +#define VNIC_INTR_TIMER_MAX 0xffff + +#define VNIC_INTR_TIMER_TYPE_ABS 0 +#define VNIC_INTR_TIMER_TYPE_QUIET 1 + +/* Interrupt control */ +struct vnic_intr_ctrl { + u32 coalescing_timer; /* 0x00 */ + u32 pad0; + u32 coalescing_value; /* 0x08 */ + u32 pad1; + u32 coalescing_type; /* 0x10 */ + u32 pad2; + u32 mask_on_assertion; /* 0x18 */ + u32 pad3; + u32 mask; /* 0x20 */ + u32 pad4; + u32 int_credits; /* 0x28 */ + u32 pad5; + u32 int_credit_return; /* 0x30 */ + u32 pad6; +}; + +struct vnic_intr { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_intr_ctrl __iomem *ctrl; /* memory-mapped */ +}; + +static inline void vnic_intr_unmask(struct vnic_intr *intr) +{ + iowrite32(0, &intr->ctrl->mask); +} + +static inline void vnic_intr_mask(struct vnic_intr *intr) +{ + iowrite32(1, &intr->ctrl->mask); +} + +static inline void vnic_intr_return_credits(struct vnic_intr *intr, + unsigned int credits, int unmask, int reset_timer) +{ +#define VNIC_INTR_UNMASK_SHIFT 16 +#define VNIC_INTR_RESET_TIMER_SHIFT 17 + + u32 int_credit_return = (credits & 0xffff) | + (unmask ? (1 << VNIC_INTR_UNMASK_SHIFT) : 0) | + (reset_timer ? (1 << VNIC_INTR_RESET_TIMER_SHIFT) : 0); + + iowrite32(int_credit_return, &intr->ctrl->int_credit_return); +} + +static inline unsigned int vnic_intr_credits(struct vnic_intr *intr) +{ + return ioread32(&intr->ctrl->int_credits); +} + +static inline void vnic_intr_return_all_credits(struct vnic_intr *intr) +{ + unsigned int credits = vnic_intr_credits(intr); + int unmask = 1; + int reset_timer = 1; + + vnic_intr_return_credits(intr, credits, unmask, reset_timer); +} + +static inline u32 vnic_intr_legacy_pba(u32 __iomem *legacy_pba) +{ + /* read PBA without clearing */ + return ioread32(legacy_pba); +} + +void vnic_intr_free(struct vnic_intr *intr); +int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index); +void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer, + unsigned int coalescing_type, unsigned int mask_on_assertion); +void vnic_intr_clean(struct vnic_intr *intr); + +#endif /* _VNIC_INTR_H_ */ diff --git a/drivers/scsi/fnic/vnic_nic.h b/drivers/scsi/fnic/vnic_nic.h new file mode 100644 index 000000000000..f15b83eeaced --- /dev/null +++ b/drivers/scsi/fnic/vnic_nic.h @@ -0,0 +1,69 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_NIC_H_ +#define _VNIC_NIC_H_ + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_set_nic_cfg fnic_set_nic_cfg + +#define NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD 0xffUL +#define NIC_CFG_RSS_DEFAULT_CPU_SHIFT 0 +#define NIC_CFG_RSS_HASH_TYPE (0xffUL << 8) +#define NIC_CFG_RSS_HASH_TYPE_MASK_FIELD 0xffUL +#define NIC_CFG_RSS_HASH_TYPE_SHIFT 8 +#define NIC_CFG_RSS_HASH_BITS (7UL << 16) +#define NIC_CFG_RSS_HASH_BITS_MASK_FIELD 7UL +#define NIC_CFG_RSS_HASH_BITS_SHIFT 16 +#define NIC_CFG_RSS_BASE_CPU (7UL << 19) +#define NIC_CFG_RSS_BASE_CPU_MASK_FIELD 7UL +#define NIC_CFG_RSS_BASE_CPU_SHIFT 19 +#define NIC_CFG_RSS_ENABLE (1UL << 22) +#define NIC_CFG_RSS_ENABLE_MASK_FIELD 1UL +#define NIC_CFG_RSS_ENABLE_SHIFT 22 +#define NIC_CFG_TSO_IPID_SPLIT_EN (1UL << 23) +#define NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD 1UL +#define NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT 23 +#define NIC_CFG_IG_VLAN_STRIP_EN (1UL << 24) +#define NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD 1UL +#define NIC_CFG_IG_VLAN_STRIP_EN_SHIFT 24 + +static inline void vnic_set_nic_cfg(u32 *nic_cfg, + u8 rss_default_cpu, u8 rss_hash_type, + u8 rss_hash_bits, u8 rss_base_cpu, + u8 rss_enable, u8 tso_ipid_split_en, + u8 ig_vlan_strip_en) +{ + *nic_cfg = (rss_default_cpu & NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD) | + ((rss_hash_type & NIC_CFG_RSS_HASH_TYPE_MASK_FIELD) + << NIC_CFG_RSS_HASH_TYPE_SHIFT) | + ((rss_hash_bits & NIC_CFG_RSS_HASH_BITS_MASK_FIELD) + << NIC_CFG_RSS_HASH_BITS_SHIFT) | + ((rss_base_cpu & NIC_CFG_RSS_BASE_CPU_MASK_FIELD) + << NIC_CFG_RSS_BASE_CPU_SHIFT) | + ((rss_enable & NIC_CFG_RSS_ENABLE_MASK_FIELD) + << NIC_CFG_RSS_ENABLE_SHIFT) | + ((tso_ipid_split_en & NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD) + << NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT) | + ((ig_vlan_strip_en & NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD) + << NIC_CFG_IG_VLAN_STRIP_EN_SHIFT); +} + +#endif /* _VNIC_NIC_H_ */ diff --git a/drivers/scsi/fnic/vnic_resource.h b/drivers/scsi/fnic/vnic_resource.h new file mode 100644 index 000000000000..2d842f79d41a --- /dev/null +++ b/drivers/scsi/fnic/vnic_resource.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_RESOURCE_H_ +#define _VNIC_RESOURCE_H_ + +#define VNIC_RES_MAGIC 0x766E6963L /* 'vnic' */ +#define VNIC_RES_VERSION 0x00000000L + +/* vNIC resource types */ +enum vnic_res_type { + RES_TYPE_EOL, /* End-of-list */ + RES_TYPE_WQ, /* Work queues */ + RES_TYPE_RQ, /* Receive queues */ + RES_TYPE_CQ, /* Completion queues */ + RES_TYPE_RSVD1, + RES_TYPE_NIC_CFG, /* Enet NIC config registers */ + RES_TYPE_RSVD2, + RES_TYPE_RSVD3, + RES_TYPE_RSVD4, + RES_TYPE_RSVD5, + RES_TYPE_INTR_CTRL, /* Interrupt ctrl table */ + RES_TYPE_INTR_TABLE, /* MSI/MSI-X Interrupt table */ + RES_TYPE_INTR_PBA, /* MSI/MSI-X PBA table */ + RES_TYPE_INTR_PBA_LEGACY, /* Legacy intr status */ + RES_TYPE_RSVD6, + RES_TYPE_RSVD7, + RES_TYPE_DEVCMD, /* Device command region */ + RES_TYPE_PASS_THRU_PAGE, /* Pass-thru page */ + + RES_TYPE_MAX, /* Count of resource types */ +}; + +struct vnic_resource_header { + u32 magic; + u32 version; +}; + +struct vnic_resource { + u8 type; + u8 bar; + u8 pad[2]; + u32 bar_offset; + u32 count; +}; + +#endif /* _VNIC_RESOURCE_H_ */ diff --git a/drivers/scsi/fnic/vnic_rq.c b/drivers/scsi/fnic/vnic_rq.c new file mode 100644 index 000000000000..bedd0d285630 --- /dev/null +++ b/drivers/scsi/fnic/vnic_rq.c @@ -0,0 +1,196 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "vnic_dev.h" +#include "vnic_rq.h" + +static int vnic_rq_alloc_bufs(struct vnic_rq *rq) +{ + struct vnic_rq_buf *buf; + struct vnic_dev *vdev; + unsigned int i, j, count = rq->ring.desc_count; + unsigned int blks = VNIC_RQ_BUF_BLKS_NEEDED(count); + + vdev = rq->vdev; + + for (i = 0; i < blks; i++) { + rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ, GFP_ATOMIC); + if (!rq->bufs[i]) { + printk(KERN_ERR "Failed to alloc rq_bufs\n"); + return -ENOMEM; + } + } + + for (i = 0; i < blks; i++) { + buf = rq->bufs[i]; + for (j = 0; j < VNIC_RQ_BUF_BLK_ENTRIES; j++) { + buf->index = i * VNIC_RQ_BUF_BLK_ENTRIES + j; + buf->desc = (u8 *)rq->ring.descs + + rq->ring.desc_size * buf->index; + if (buf->index + 1 == count) { + buf->next = rq->bufs[0]; + break; + } else if (j + 1 == VNIC_RQ_BUF_BLK_ENTRIES) { + buf->next = rq->bufs[i + 1]; + } else { + buf->next = buf + 1; + buf++; + } + } + } + + rq->to_use = rq->to_clean = rq->bufs[0]; + rq->buf_index = 0; + + return 0; +} + +void vnic_rq_free(struct vnic_rq *rq) +{ + struct vnic_dev *vdev; + unsigned int i; + + vdev = rq->vdev; + + vnic_dev_free_desc_ring(vdev, &rq->ring); + + for (i = 0; i < VNIC_RQ_BUF_BLKS_MAX; i++) { + kfree(rq->bufs[i]); + rq->bufs[i] = NULL; + } + + rq->ctrl = NULL; +} + +int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + + rq->index = index; + rq->vdev = vdev; + + rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index); + if (!rq->ctrl) { + printk(KERN_ERR "Failed to hook RQ[%d] resource\n", index); + return -EINVAL; + } + + vnic_rq_disable(rq); + + err = vnic_dev_alloc_desc_ring(vdev, &rq->ring, desc_count, desc_size); + if (err) + return err; + + err = vnic_rq_alloc_bufs(rq); + if (err) { + vnic_rq_free(rq); + return err; + } + + return 0; +} + +void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + u64 paddr; + u32 fetch_index; + + paddr = (u64)rq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &rq->ctrl->ring_base); + iowrite32(rq->ring.desc_count, &rq->ctrl->ring_size); + iowrite32(cq_index, &rq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &rq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &rq->ctrl->error_interrupt_offset); + iowrite32(0, &rq->ctrl->dropped_packet_count); + iowrite32(0, &rq->ctrl->error_status); + + /* Use current fetch_index as the ring starting point */ + fetch_index = ioread32(&rq->ctrl->fetch_index); + rq->to_use = rq->to_clean = + &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES] + [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES]; + iowrite32(fetch_index, &rq->ctrl->posted_index); + + rq->buf_index = 0; +} + +unsigned int vnic_rq_error_status(struct vnic_rq *rq) +{ + return ioread32(&rq->ctrl->error_status); +} + +void vnic_rq_enable(struct vnic_rq *rq) +{ + iowrite32(1, &rq->ctrl->enable); +} + +int vnic_rq_disable(struct vnic_rq *rq) +{ + unsigned int wait; + + iowrite32(0, &rq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 100; wait++) { + if (!(ioread32(&rq->ctrl->running))) + return 0; + udelay(1); + } + + printk(KERN_ERR "Failed to disable RQ[%d]\n", rq->index); + + return -ETIMEDOUT; +} + +void vnic_rq_clean(struct vnic_rq *rq, + void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf)) +{ + struct vnic_rq_buf *buf; + u32 fetch_index; + + BUG_ON(ioread32(&rq->ctrl->enable)); + + buf = rq->to_clean; + + while (vnic_rq_desc_used(rq) > 0) { + + (*buf_clean)(rq, buf); + + buf = rq->to_clean = buf->next; + rq->ring.desc_avail++; + } + + /* Use current fetch_index as the ring starting point */ + fetch_index = ioread32(&rq->ctrl->fetch_index); + rq->to_use = rq->to_clean = + &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES] + [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES]; + iowrite32(fetch_index, &rq->ctrl->posted_index); + + rq->buf_index = 0; + + vnic_dev_clear_desc_ring(&rq->ring); +} + diff --git a/drivers/scsi/fnic/vnic_rq.h b/drivers/scsi/fnic/vnic_rq.h new file mode 100644 index 000000000000..aebdfbd6ad3c --- /dev/null +++ b/drivers/scsi/fnic/vnic_rq.h @@ -0,0 +1,235 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_RQ_H_ +#define _VNIC_RQ_H_ + +#include +#include "vnic_dev.h" +#include "vnic_cq.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_rq_desc_avail fnic_rq_desc_avail +#define vnic_rq_desc_used fnic_rq_desc_used +#define vnic_rq_next_desc fnic_rq_next_desc +#define vnic_rq_next_index fnic_rq_next_index +#define vnic_rq_next_buf_index fnic_rq_next_buf_index +#define vnic_rq_post fnic_rq_post +#define vnic_rq_posting_soon fnic_rq_posting_soon +#define vnic_rq_return_descs fnic_rq_return_descs +#define vnic_rq_service fnic_rq_service +#define vnic_rq_fill fnic_rq_fill +#define vnic_rq_free fnic_rq_free +#define vnic_rq_alloc fnic_rq_alloc +#define vnic_rq_init fnic_rq_init +#define vnic_rq_error_status fnic_rq_error_status +#define vnic_rq_enable fnic_rq_enable +#define vnic_rq_disable fnic_rq_disable +#define vnic_rq_clean fnic_rq_clean + +/* Receive queue control */ +struct vnic_rq_ctrl { + u64 ring_base; /* 0x00 */ + u32 ring_size; /* 0x08 */ + u32 pad0; + u32 posted_index; /* 0x10 */ + u32 pad1; + u32 cq_index; /* 0x18 */ + u32 pad2; + u32 enable; /* 0x20 */ + u32 pad3; + u32 running; /* 0x28 */ + u32 pad4; + u32 fetch_index; /* 0x30 */ + u32 pad5; + u32 error_interrupt_enable; /* 0x38 */ + u32 pad6; + u32 error_interrupt_offset; /* 0x40 */ + u32 pad7; + u32 error_status; /* 0x48 */ + u32 pad8; + u32 dropped_packet_count; /* 0x50 */ + u32 pad9; + u32 dropped_packet_count_rc; /* 0x58 */ + u32 pad10; +}; + +/* Break the vnic_rq_buf allocations into blocks of 64 entries */ +#define VNIC_RQ_BUF_BLK_ENTRIES 64 +#define VNIC_RQ_BUF_BLK_SZ \ + (VNIC_RQ_BUF_BLK_ENTRIES * sizeof(struct vnic_rq_buf)) +#define VNIC_RQ_BUF_BLKS_NEEDED(entries) \ + DIV_ROUND_UP(entries, VNIC_RQ_BUF_BLK_ENTRIES) +#define VNIC_RQ_BUF_BLKS_MAX VNIC_RQ_BUF_BLKS_NEEDED(4096) + +struct vnic_rq_buf { + struct vnic_rq_buf *next; + dma_addr_t dma_addr; + void *os_buf; + unsigned int os_buf_index; + unsigned int len; + unsigned int index; + void *desc; +}; + +struct vnic_rq { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_rq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + struct vnic_rq_buf *bufs[VNIC_RQ_BUF_BLKS_MAX]; + struct vnic_rq_buf *to_use; + struct vnic_rq_buf *to_clean; + void *os_buf_head; + unsigned int buf_index; + unsigned int pkts_outstanding; +}; + +static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq) +{ + /* how many does SW own? */ + return rq->ring.desc_avail; +} + +static inline unsigned int vnic_rq_desc_used(struct vnic_rq *rq) +{ + /* how many does HW own? */ + return rq->ring.desc_count - rq->ring.desc_avail - 1; +} + +static inline void *vnic_rq_next_desc(struct vnic_rq *rq) +{ + return rq->to_use->desc; +} + +static inline unsigned int vnic_rq_next_index(struct vnic_rq *rq) +{ + return rq->to_use->index; +} + +static inline unsigned int vnic_rq_next_buf_index(struct vnic_rq *rq) +{ + return rq->buf_index++; +} + +static inline void vnic_rq_post(struct vnic_rq *rq, + void *os_buf, unsigned int os_buf_index, + dma_addr_t dma_addr, unsigned int len) +{ + struct vnic_rq_buf *buf = rq->to_use; + + buf->os_buf = os_buf; + buf->os_buf_index = os_buf_index; + buf->dma_addr = dma_addr; + buf->len = len; + + buf = buf->next; + rq->to_use = buf; + rq->ring.desc_avail--; + + /* Move the posted_index every nth descriptor + */ + +#ifndef VNIC_RQ_RETURN_RATE +#define VNIC_RQ_RETURN_RATE 0xf /* keep 2^n - 1 */ +#endif + + if ((buf->index & VNIC_RQ_RETURN_RATE) == 0) { + /* Adding write memory barrier prevents compiler and/or CPU + * reordering, thus avoiding descriptor posting before + * descriptor is initialized. Otherwise, hardware can read + * stale descriptor fields. + */ + wmb(); + iowrite32(buf->index, &rq->ctrl->posted_index); + } +} + +static inline int vnic_rq_posting_soon(struct vnic_rq *rq) +{ + return (rq->to_use->index & VNIC_RQ_RETURN_RATE) == 0; +} + +static inline void vnic_rq_return_descs(struct vnic_rq *rq, unsigned int count) +{ + rq->ring.desc_avail += count; +} + +enum desc_return_options { + VNIC_RQ_RETURN_DESC, + VNIC_RQ_DEFER_RETURN_DESC, +}; + +static inline void vnic_rq_service(struct vnic_rq *rq, + struct cq_desc *cq_desc, u16 completed_index, + int desc_return, void (*buf_service)(struct vnic_rq *rq, + struct cq_desc *cq_desc, struct vnic_rq_buf *buf, + int skipped, void *opaque), void *opaque) +{ + struct vnic_rq_buf *buf; + int skipped; + + buf = rq->to_clean; + while (1) { + + skipped = (buf->index != completed_index); + + (*buf_service)(rq, cq_desc, buf, skipped, opaque); + + if (desc_return == VNIC_RQ_RETURN_DESC) + rq->ring.desc_avail++; + + rq->to_clean = buf->next; + + if (!skipped) + break; + + buf = rq->to_clean; + } +} + +static inline int vnic_rq_fill(struct vnic_rq *rq, + int (*buf_fill)(struct vnic_rq *rq)) +{ + int err; + + while (vnic_rq_desc_avail(rq) > 1) { + + err = (*buf_fill)(rq); + if (err) + return err; + } + + return 0; +} + +void vnic_rq_free(struct vnic_rq *rq); +int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +unsigned int vnic_rq_error_status(struct vnic_rq *rq); +void vnic_rq_enable(struct vnic_rq *rq); +int vnic_rq_disable(struct vnic_rq *rq); +void vnic_rq_clean(struct vnic_rq *rq, + void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf)); + +#endif /* _VNIC_RQ_H_ */ diff --git a/drivers/scsi/fnic/vnic_scsi.h b/drivers/scsi/fnic/vnic_scsi.h new file mode 100644 index 000000000000..46baa5254001 --- /dev/null +++ b/drivers/scsi/fnic/vnic_scsi.h @@ -0,0 +1,99 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_SCSI_H_ +#define _VNIC_SCSI_H_ + +#define VNIC_FNIC_WQ_COPY_COUNT_MIN 1 +#define VNIC_FNIC_WQ_COPY_COUNT_MAX 1 + +#define VNIC_FNIC_WQ_DESCS_MIN 64 +#define VNIC_FNIC_WQ_DESCS_MAX 128 + +#define VNIC_FNIC_WQ_COPY_DESCS_MIN 64 +#define VNIC_FNIC_WQ_COPY_DESCS_MAX 512 + +#define VNIC_FNIC_RQ_DESCS_MIN 64 +#define VNIC_FNIC_RQ_DESCS_MAX 128 + +#define VNIC_FNIC_EDTOV_MIN 1000 +#define VNIC_FNIC_EDTOV_MAX 255000 +#define VNIC_FNIC_EDTOV_DEF 2000 + +#define VNIC_FNIC_RATOV_MIN 1000 +#define VNIC_FNIC_RATOV_MAX 255000 + +#define VNIC_FNIC_MAXDATAFIELDSIZE_MIN 256 +#define VNIC_FNIC_MAXDATAFIELDSIZE_MAX 2112 + +#define VNIC_FNIC_FLOGI_RETRIES_MIN 0 +#define VNIC_FNIC_FLOGI_RETRIES_MAX 0xffffffff +#define VNIC_FNIC_FLOGI_RETRIES_DEF 0xffffffff + +#define VNIC_FNIC_FLOGI_TIMEOUT_MIN 1000 +#define VNIC_FNIC_FLOGI_TIMEOUT_MAX 255000 + +#define VNIC_FNIC_PLOGI_RETRIES_MIN 0 +#define VNIC_FNIC_PLOGI_RETRIES_MAX 255 +#define VNIC_FNIC_PLOGI_RETRIES_DEF 8 + +#define VNIC_FNIC_PLOGI_TIMEOUT_MIN 1000 +#define VNIC_FNIC_PLOGI_TIMEOUT_MAX 255000 + +#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN 256 +#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX 4096 + +#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MIN 0 +#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX 240000 + +#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MIN 0 +#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX 240000 + +#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MIN 0 +#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX 255 + +#define VNIC_FNIC_LUNS_PER_TARGET_MIN 1 +#define VNIC_FNIC_LUNS_PER_TARGET_MAX 1024 + +/* Device-specific region: scsi configuration */ +struct vnic_fc_config { + u64 node_wwn; + u64 port_wwn; + u32 flags; + u32 wq_enet_desc_count; + u32 wq_copy_desc_count; + u32 rq_desc_count; + u32 flogi_retries; + u32 flogi_timeout; + u32 plogi_retries; + u32 plogi_timeout; + u32 io_throttle_count; + u32 link_down_timeout; + u32 port_down_timeout; + u32 port_down_io_retries; + u32 luns_per_tgt; + u16 maxdatafieldsize; + u16 ed_tov; + u16 ra_tov; + u16 intr_timer; + u8 intr_timer_type; +}; + +#define VFCF_FCP_SEQ_LVL_ERR 0x1 /* Enable FCP-2 Error Recovery */ +#define VFCF_PERBI 0x2 /* persistent binding info available */ + +#endif /* _VNIC_SCSI_H_ */ diff --git a/drivers/scsi/fnic/vnic_stats.h b/drivers/scsi/fnic/vnic_stats.h new file mode 100644 index 000000000000..5372e23c1cb3 --- /dev/null +++ b/drivers/scsi/fnic/vnic_stats.h @@ -0,0 +1,68 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_STATS_H_ +#define _VNIC_STATS_H_ + +/* Tx statistics */ +struct vnic_tx_stats { + u64 tx_frames_ok; + u64 tx_unicast_frames_ok; + u64 tx_multicast_frames_ok; + u64 tx_broadcast_frames_ok; + u64 tx_bytes_ok; + u64 tx_unicast_bytes_ok; + u64 tx_multicast_bytes_ok; + u64 tx_broadcast_bytes_ok; + u64 tx_drops; + u64 tx_errors; + u64 tx_tso; + u64 rsvd[16]; +}; + +/* Rx statistics */ +struct vnic_rx_stats { + u64 rx_frames_ok; + u64 rx_frames_total; + u64 rx_unicast_frames_ok; + u64 rx_multicast_frames_ok; + u64 rx_broadcast_frames_ok; + u64 rx_bytes_ok; + u64 rx_unicast_bytes_ok; + u64 rx_multicast_bytes_ok; + u64 rx_broadcast_bytes_ok; + u64 rx_drop; + u64 rx_no_bufs; + u64 rx_errors; + u64 rx_rss; + u64 rx_crc_errors; + u64 rx_frames_64; + u64 rx_frames_127; + u64 rx_frames_255; + u64 rx_frames_511; + u64 rx_frames_1023; + u64 rx_frames_1518; + u64 rx_frames_to_max; + u64 rsvd[16]; +}; + +struct vnic_stats { + struct vnic_tx_stats tx; + struct vnic_rx_stats rx; +}; + +#endif /* _VNIC_STATS_H_ */ diff --git a/drivers/scsi/fnic/vnic_wq.c b/drivers/scsi/fnic/vnic_wq.c new file mode 100644 index 000000000000..1f9ea790d130 --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq.c @@ -0,0 +1,182 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "vnic_dev.h" +#include "vnic_wq.h" + +static int vnic_wq_alloc_bufs(struct vnic_wq *wq) +{ + struct vnic_wq_buf *buf; + struct vnic_dev *vdev; + unsigned int i, j, count = wq->ring.desc_count; + unsigned int blks = VNIC_WQ_BUF_BLKS_NEEDED(count); + + vdev = wq->vdev; + + for (i = 0; i < blks; i++) { + wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ, GFP_ATOMIC); + if (!wq->bufs[i]) { + printk(KERN_ERR "Failed to alloc wq_bufs\n"); + return -ENOMEM; + } + } + + for (i = 0; i < blks; i++) { + buf = wq->bufs[i]; + for (j = 0; j < VNIC_WQ_BUF_BLK_ENTRIES; j++) { + buf->index = i * VNIC_WQ_BUF_BLK_ENTRIES + j; + buf->desc = (u8 *)wq->ring.descs + + wq->ring.desc_size * buf->index; + if (buf->index + 1 == count) { + buf->next = wq->bufs[0]; + break; + } else if (j + 1 == VNIC_WQ_BUF_BLK_ENTRIES) { + buf->next = wq->bufs[i + 1]; + } else { + buf->next = buf + 1; + buf++; + } + } + } + + wq->to_use = wq->to_clean = wq->bufs[0]; + + return 0; +} + +void vnic_wq_free(struct vnic_wq *wq) +{ + struct vnic_dev *vdev; + unsigned int i; + + vdev = wq->vdev; + + vnic_dev_free_desc_ring(vdev, &wq->ring); + + for (i = 0; i < VNIC_WQ_BUF_BLKS_MAX; i++) { + kfree(wq->bufs[i]); + wq->bufs[i] = NULL; + } + + wq->ctrl = NULL; + +} + +int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + + wq->index = index; + wq->vdev = vdev; + + wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index); + if (!wq->ctrl) { + printk(KERN_ERR "Failed to hook WQ[%d] resource\n", index); + return -EINVAL; + } + + vnic_wq_disable(wq); + + err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size); + if (err) + return err; + + err = vnic_wq_alloc_bufs(wq); + if (err) { + vnic_wq_free(wq); + return err; + } + + return 0; +} + +void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + u64 paddr; + + paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &wq->ctrl->ring_base); + iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size); + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(cq_index, &wq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset); + iowrite32(0, &wq->ctrl->error_status); +} + +unsigned int vnic_wq_error_status(struct vnic_wq *wq) +{ + return ioread32(&wq->ctrl->error_status); +} + +void vnic_wq_enable(struct vnic_wq *wq) +{ + iowrite32(1, &wq->ctrl->enable); +} + +int vnic_wq_disable(struct vnic_wq *wq) +{ + unsigned int wait; + + iowrite32(0, &wq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 100; wait++) { + if (!(ioread32(&wq->ctrl->running))) + return 0; + udelay(1); + } + + printk(KERN_ERR "Failed to disable WQ[%d]\n", wq->index); + + return -ETIMEDOUT; +} + +void vnic_wq_clean(struct vnic_wq *wq, + void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf)) +{ + struct vnic_wq_buf *buf; + + BUG_ON(ioread32(&wq->ctrl->enable)); + + buf = wq->to_clean; + + while (vnic_wq_desc_used(wq) > 0) { + + (*buf_clean)(wq, buf); + + buf = wq->to_clean = buf->next; + wq->ring.desc_avail++; + } + + wq->to_use = wq->to_clean = wq->bufs[0]; + + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(0, &wq->ctrl->error_status); + + vnic_dev_clear_desc_ring(&wq->ring); +} diff --git a/drivers/scsi/fnic/vnic_wq.h b/drivers/scsi/fnic/vnic_wq.h new file mode 100644 index 000000000000..5cd094f79281 --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq.h @@ -0,0 +1,175 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_WQ_H_ +#define _VNIC_WQ_H_ + +#include +#include "vnic_dev.h" +#include "vnic_cq.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_wq_desc_avail fnic_wq_desc_avail +#define vnic_wq_desc_used fnic_wq_desc_used +#define vnic_wq_next_desc fni_cwq_next_desc +#define vnic_wq_post fnic_wq_post +#define vnic_wq_service fnic_wq_service +#define vnic_wq_free fnic_wq_free +#define vnic_wq_alloc fnic_wq_alloc +#define vnic_wq_init fnic_wq_init +#define vnic_wq_error_status fnic_wq_error_status +#define vnic_wq_enable fnic_wq_enable +#define vnic_wq_disable fnic_wq_disable +#define vnic_wq_clean fnic_wq_clean + +/* Work queue control */ +struct vnic_wq_ctrl { + u64 ring_base; /* 0x00 */ + u32 ring_size; /* 0x08 */ + u32 pad0; + u32 posted_index; /* 0x10 */ + u32 pad1; + u32 cq_index; /* 0x18 */ + u32 pad2; + u32 enable; /* 0x20 */ + u32 pad3; + u32 running; /* 0x28 */ + u32 pad4; + u32 fetch_index; /* 0x30 */ + u32 pad5; + u32 dca_value; /* 0x38 */ + u32 pad6; + u32 error_interrupt_enable; /* 0x40 */ + u32 pad7; + u32 error_interrupt_offset; /* 0x48 */ + u32 pad8; + u32 error_status; /* 0x50 */ + u32 pad9; +}; + +struct vnic_wq_buf { + struct vnic_wq_buf *next; + dma_addr_t dma_addr; + void *os_buf; + unsigned int len; + unsigned int index; + int sop; + void *desc; +}; + +/* Break the vnic_wq_buf allocations into blocks of 64 entries */ +#define VNIC_WQ_BUF_BLK_ENTRIES 64 +#define VNIC_WQ_BUF_BLK_SZ \ + (VNIC_WQ_BUF_BLK_ENTRIES * sizeof(struct vnic_wq_buf)) +#define VNIC_WQ_BUF_BLKS_NEEDED(entries) \ + DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES) +#define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096) + +struct vnic_wq { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + struct vnic_wq_buf *bufs[VNIC_WQ_BUF_BLKS_MAX]; + struct vnic_wq_buf *to_use; + struct vnic_wq_buf *to_clean; + unsigned int pkts_outstanding; +}; + +static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq) +{ + /* how many does SW own? */ + return wq->ring.desc_avail; +} + +static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq) +{ + /* how many does HW own? */ + return wq->ring.desc_count - wq->ring.desc_avail - 1; +} + +static inline void *vnic_wq_next_desc(struct vnic_wq *wq) +{ + return wq->to_use->desc; +} + +static inline void vnic_wq_post(struct vnic_wq *wq, + void *os_buf, dma_addr_t dma_addr, + unsigned int len, int sop, int eop) +{ + struct vnic_wq_buf *buf = wq->to_use; + + buf->sop = sop; + buf->os_buf = eop ? os_buf : NULL; + buf->dma_addr = dma_addr; + buf->len = len; + + buf = buf->next; + if (eop) { + /* Adding write memory barrier prevents compiler and/or CPU + * reordering, thus avoiding descriptor posting before + * descriptor is initialized. Otherwise, hardware can read + * stale descriptor fields. + */ + wmb(); + iowrite32(buf->index, &wq->ctrl->posted_index); + } + wq->to_use = buf; + + wq->ring.desc_avail--; +} + +static inline void vnic_wq_service(struct vnic_wq *wq, + struct cq_desc *cq_desc, u16 completed_index, + void (*buf_service)(struct vnic_wq *wq, + struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque), + void *opaque) +{ + struct vnic_wq_buf *buf; + + buf = wq->to_clean; + while (1) { + + (*buf_service)(wq, cq_desc, buf, opaque); + + wq->ring.desc_avail++; + + wq->to_clean = buf->next; + + if (buf->index == completed_index) + break; + + buf = wq->to_clean; + } +} + +void vnic_wq_free(struct vnic_wq *wq); +int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +unsigned int vnic_wq_error_status(struct vnic_wq *wq); +void vnic_wq_enable(struct vnic_wq *wq); +int vnic_wq_disable(struct vnic_wq *wq); +void vnic_wq_clean(struct vnic_wq *wq, + void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf)); + +#endif /* _VNIC_WQ_H_ */ diff --git a/drivers/scsi/fnic/vnic_wq_copy.c b/drivers/scsi/fnic/vnic_wq_copy.c new file mode 100644 index 000000000000..9eab7e7caf38 --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq_copy.c @@ -0,0 +1,117 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "vnic_wq_copy.h" + +void vnic_wq_copy_enable(struct vnic_wq_copy *wq) +{ + iowrite32(1, &wq->ctrl->enable); +} + +int vnic_wq_copy_disable(struct vnic_wq_copy *wq) +{ + unsigned int wait; + + iowrite32(0, &wq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 100; wait++) { + if (!(ioread32(&wq->ctrl->running))) + return 0; + udelay(1); + } + + printk(KERN_ERR "Failed to disable Copy WQ[%d]," + " fetch index=%d, posted_index=%d\n", + wq->index, ioread32(&wq->ctrl->fetch_index), + ioread32(&wq->ctrl->posted_index)); + + return -ENODEV; +} + +void vnic_wq_copy_clean(struct vnic_wq_copy *wq, + void (*q_clean)(struct vnic_wq_copy *wq, + struct fcpio_host_req *wq_desc)) +{ + BUG_ON(ioread32(&wq->ctrl->enable)); + + if (vnic_wq_copy_desc_in_use(wq)) + vnic_wq_copy_service(wq, -1, q_clean); + + wq->to_use_index = wq->to_clean_index = 0; + + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(0, &wq->ctrl->error_status); + + vnic_dev_clear_desc_ring(&wq->ring); +} + +void vnic_wq_copy_free(struct vnic_wq_copy *wq) +{ + struct vnic_dev *vdev; + + vdev = wq->vdev; + vnic_dev_free_desc_ring(vdev, &wq->ring); + wq->ctrl = NULL; +} + +int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size) +{ + int err; + + wq->index = index; + wq->vdev = vdev; + wq->to_use_index = wq->to_clean_index = 0; + wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index); + if (!wq->ctrl) { + printk(KERN_ERR "Failed to hook COPY WQ[%d] resource\n", index); + return -EINVAL; + } + + vnic_wq_copy_disable(wq); + + err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size); + if (err) + return err; + + return 0; +} + +void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + u64 paddr; + + paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &wq->ctrl->ring_base); + iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size); + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(cq_index, &wq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset); +} + diff --git a/drivers/scsi/fnic/vnic_wq_copy.h b/drivers/scsi/fnic/vnic_wq_copy.h new file mode 100644 index 000000000000..6aff9740c3df --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq_copy.h @@ -0,0 +1,128 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_WQ_COPY_H_ +#define _VNIC_WQ_COPY_H_ + +#include +#include "vnic_wq.h" +#include "fcpio.h" + +#define VNIC_WQ_COPY_MAX 1 + +struct vnic_wq_copy { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + unsigned to_use_index; + unsigned to_clean_index; +}; + +static inline unsigned int vnic_wq_copy_desc_avail(struct vnic_wq_copy *wq) +{ + return wq->ring.desc_avail; +} + +static inline unsigned int vnic_wq_copy_desc_in_use(struct vnic_wq_copy *wq) +{ + return wq->ring.desc_count - 1 - wq->ring.desc_avail; +} + +static inline void *vnic_wq_copy_next_desc(struct vnic_wq_copy *wq) +{ + struct fcpio_host_req *desc = wq->ring.descs; + return &desc[wq->to_use_index]; +} + +static inline void vnic_wq_copy_post(struct vnic_wq_copy *wq) +{ + + ((wq->to_use_index + 1) == wq->ring.desc_count) ? + (wq->to_use_index = 0) : (wq->to_use_index++); + wq->ring.desc_avail--; + + /* Adding write memory barrier prevents compiler and/or CPU + * reordering, thus avoiding descriptor posting before + * descriptor is initialized. Otherwise, hardware can read + * stale descriptor fields. + */ + wmb(); + + iowrite32(wq->to_use_index, &wq->ctrl->posted_index); +} + +static inline void vnic_wq_copy_desc_process(struct vnic_wq_copy *wq, u16 index) +{ + unsigned int cnt; + + if (wq->to_clean_index <= index) + cnt = (index - wq->to_clean_index) + 1; + else + cnt = wq->ring.desc_count - wq->to_clean_index + index + 1; + + wq->to_clean_index = ((index + 1) % wq->ring.desc_count); + wq->ring.desc_avail += cnt; + +} + +static inline void vnic_wq_copy_service(struct vnic_wq_copy *wq, + u16 completed_index, + void (*q_service)(struct vnic_wq_copy *wq, + struct fcpio_host_req *wq_desc)) +{ + struct fcpio_host_req *wq_desc = wq->ring.descs; + unsigned int curr_index; + + while (1) { + + if (q_service) + (*q_service)(wq, &wq_desc[wq->to_clean_index]); + + wq->ring.desc_avail++; + + curr_index = wq->to_clean_index; + + /* increment the to-clean index so that we start + * with an unprocessed index next time we enter the loop + */ + ((wq->to_clean_index + 1) == wq->ring.desc_count) ? + (wq->to_clean_index = 0) : (wq->to_clean_index++); + + if (curr_index == completed_index) + break; + + /* we have cleaned all the entries */ + if ((completed_index == (u16)-1) && + (wq->to_clean_index == wq->to_use_index)) + break; + } +} + +void vnic_wq_copy_enable(struct vnic_wq_copy *wq); +int vnic_wq_copy_disable(struct vnic_wq_copy *wq); +void vnic_wq_copy_free(struct vnic_wq_copy *wq); +int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq, + unsigned int index, unsigned int desc_count, unsigned int desc_size); +void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +void vnic_wq_copy_clean(struct vnic_wq_copy *wq, + void (*q_clean)(struct vnic_wq_copy *wq, + struct fcpio_host_req *wq_desc)); + +#endif /* _VNIC_WQ_COPY_H_ */ diff --git a/drivers/scsi/fnic/wq_enet_desc.h b/drivers/scsi/fnic/wq_enet_desc.h new file mode 100644 index 000000000000..b121cbad18b8 --- /dev/null +++ b/drivers/scsi/fnic/wq_enet_desc.h @@ -0,0 +1,96 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _WQ_ENET_DESC_H_ +#define _WQ_ENET_DESC_H_ + +/* Ethernet work queue descriptor: 16B */ +struct wq_enet_desc { + __le64 address; + __le16 length; + __le16 mss_loopback; + __le16 header_length_flags; + __le16 vlan_tag; +}; + +#define WQ_ENET_ADDR_BITS 64 +#define WQ_ENET_LEN_BITS 14 +#define WQ_ENET_LEN_MASK ((1 << WQ_ENET_LEN_BITS) - 1) +#define WQ_ENET_MSS_BITS 14 +#define WQ_ENET_MSS_MASK ((1 << WQ_ENET_MSS_BITS) - 1) +#define WQ_ENET_MSS_SHIFT 2 +#define WQ_ENET_LOOPBACK_SHIFT 1 +#define WQ_ENET_HDRLEN_BITS 10 +#define WQ_ENET_HDRLEN_MASK ((1 << WQ_ENET_HDRLEN_BITS) - 1) +#define WQ_ENET_FLAGS_OM_BITS 2 +#define WQ_ENET_FLAGS_OM_MASK ((1 << WQ_ENET_FLAGS_OM_BITS) - 1) +#define WQ_ENET_FLAGS_EOP_SHIFT 12 +#define WQ_ENET_FLAGS_CQ_ENTRY_SHIFT 13 +#define WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT 14 +#define WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT 15 + +#define WQ_ENET_OFFLOAD_MODE_CSUM 0 +#define WQ_ENET_OFFLOAD_MODE_RESERVED 1 +#define WQ_ENET_OFFLOAD_MODE_CSUM_L4 2 +#define WQ_ENET_OFFLOAD_MODE_TSO 3 + +static inline void wq_enet_desc_enc(struct wq_enet_desc *desc, + u64 address, u16 length, u16 mss, u16 header_length, + u8 offload_mode, u8 eop, u8 cq_entry, u8 fcoe_encap, + u8 vlan_tag_insert, u16 vlan_tag, u8 loopback) +{ + desc->address = cpu_to_le64(address); + desc->length = cpu_to_le16(length & WQ_ENET_LEN_MASK); + desc->mss_loopback = cpu_to_le16((mss & WQ_ENET_MSS_MASK) << + WQ_ENET_MSS_SHIFT | (loopback & 1) << WQ_ENET_LOOPBACK_SHIFT); + desc->header_length_flags = cpu_to_le16( + (header_length & WQ_ENET_HDRLEN_MASK) | + (offload_mode & WQ_ENET_FLAGS_OM_MASK) << WQ_ENET_HDRLEN_BITS | + (eop & 1) << WQ_ENET_FLAGS_EOP_SHIFT | + (cq_entry & 1) << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT | + (fcoe_encap & 1) << WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT | + (vlan_tag_insert & 1) << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT); + desc->vlan_tag = cpu_to_le16(vlan_tag); +} + +static inline void wq_enet_desc_dec(struct wq_enet_desc *desc, + u64 *address, u16 *length, u16 *mss, u16 *header_length, + u8 *offload_mode, u8 *eop, u8 *cq_entry, u8 *fcoe_encap, + u8 *vlan_tag_insert, u16 *vlan_tag, u8 *loopback) +{ + *address = le64_to_cpu(desc->address); + *length = le16_to_cpu(desc->length) & WQ_ENET_LEN_MASK; + *mss = (le16_to_cpu(desc->mss_loopback) >> WQ_ENET_MSS_SHIFT) & + WQ_ENET_MSS_MASK; + *loopback = (u8)((le16_to_cpu(desc->mss_loopback) >> + WQ_ENET_LOOPBACK_SHIFT) & 1); + *header_length = le16_to_cpu(desc->header_length_flags) & + WQ_ENET_HDRLEN_MASK; + *offload_mode = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_HDRLEN_BITS) & WQ_ENET_FLAGS_OM_MASK); + *eop = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_EOP_SHIFT) & 1); + *cq_entry = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_CQ_ENTRY_SHIFT) & 1); + *fcoe_encap = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT) & 1); + *vlan_tag_insert = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT) & 1); + *vlan_tag = le16_to_cpu(desc->vlan_tag); +} + +#endif /* _WQ_ENET_DESC_H_ */ From c53a284f8be23735dc6b53929640a987055f2933 Mon Sep 17 00:00:00 2001 From: Edward Goggin Date: Thu, 9 Apr 2009 10:02:22 -0700 Subject: [PATCH 03/91] [SCSI] initialize max_target_blocked in scsi_alloc_target This patch initializes the max_target_blocked field of a scsi target structure so that a queuecommand return value of SCSI_MLQUEUE_TARGET_BUSY will actually result in having the scsi_queue_insert blocking the device queue before requeuing the command and running the queue. Otherwise, can and does cause livelock on single CPU configurations if/when open-iSCSI software initiator's command PDU window fills. Signed-off-by: Ed Goggin Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 6f51ca485f35..e2b50d8f57a8 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -425,6 +425,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, INIT_LIST_HEAD(&starget->devices); starget->state = STARGET_CREATED; starget->scsi_level = SCSI_2; + starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED; retry: spin_lock_irqsave(shost->host_lock, flags); From 9a1a69a1f41cbefebf3172761f197db6aba71e68 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Wed, 29 Apr 2009 13:12:39 -0500 Subject: [PATCH 04/91] [SCSI] fc-transport: Close state transition-window during rport deletion. Andrew Vasquez wrote: > fc-transport: Close state transition-window during rport deletion. > > After an rport's state has transitioned to FC_PORTSTATE_BLOCKED, > but, prior to making the upcall to 'block' the scsi-target > associated with an rport, queued commands can recycle and > ultimately run out of retries causing failures to propagate to > upper-level drivers. Close this transition-window by returning > the non-'retries' modifying DID_IMM_RETRY status for submitted > I/Os. The same can happen for iscsi when transitioning from logged in to failed and blocking the sdevs. This patch converts iscsi and fc's transitions back to use DID_IMM_RETRY instead of DID_TRANSPORT_DISRUPTED which has a limited number of retries that we do not want to use for handling this race. Signed-off-by: Andrew Vasquez [Addition of iscsi and fc port online devloss case conversion by Mike Christie] Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_iscsi.c | 2 +- include/scsi/scsi_transport_fc.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 094795455293..0a2ce7b6325c 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -357,7 +357,7 @@ int iscsi_session_chkready(struct iscsi_cls_session *session) err = 0; break; case ISCSI_SESSION_FAILED: - err = DID_TRANSPORT_DISRUPTED << 16; + err = DID_IMM_RETRY << 16; break; case ISCSI_SESSION_FREE: err = DID_TRANSPORT_FAILFAST << 16; diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index c9184f756cad..68a8d873bbd9 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -680,7 +680,7 @@ fc_remote_port_chkready(struct fc_rport *rport) if (rport->roles & FC_PORT_ROLE_FCP_TARGET) result = 0; else if (rport->flags & FC_RPORT_DEVLOSS_PENDING) - result = DID_TRANSPORT_DISRUPTED << 16; + result = DID_IMM_RETRY << 16; else result = DID_NO_CONNECT << 16; break; @@ -688,7 +688,7 @@ fc_remote_port_chkready(struct fc_rport *rport) if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT) result = DID_TRANSPORT_FAILFAST << 16; else - result = DID_TRANSPORT_DISRUPTED << 16; + result = DID_IMM_RETRY << 16; break; default: result = DID_NO_CONNECT << 16; From 5e43754fd949193252ecb470d7fb08b547a1e310 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 30 Apr 2009 19:13:41 -0700 Subject: [PATCH 05/91] [SCSI] ses: fix problems caused by empty SES provided name We use the name provided by SES to name objects. An empty name is legal in SES but causes problems in our generic device hierarchy. Fix this by falling back to a number if the name is either NULL or empty. Also fix a secondary bug spotted in that dev_set_name(dev, name) uses a string format and so would go wrong if name contained a '%'. Signed-off-by: Yinghai Lu Signed-off-by: James Bottomley --- drivers/misc/enclosure.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 3cf61ece71d7..348443bdb23b 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -119,7 +119,7 @@ enclosure_register(struct device *dev, const char *name, int components, edev->edev.class = &enclosure_class; edev->edev.parent = get_device(dev); edev->cb = cb; - dev_set_name(&edev->edev, name); + dev_set_name(&edev->edev, "%s", name); err = device_register(&edev->edev); if (err) goto err; @@ -255,8 +255,8 @@ enclosure_component_register(struct enclosure_device *edev, ecomp->number = number; cdev = &ecomp->cdev; cdev->parent = get_device(&edev->edev); - if (name) - dev_set_name(cdev, name); + if (name && name[0]) + dev_set_name(cdev, "%s", name); else dev_set_name(cdev, "%u", number); From 8454e9888cb0316dd296fd5d47c612248ed5e1d1 Mon Sep 17 00:00:00 2001 From: adam radford Date: Tue, 5 May 2009 11:45:37 -0700 Subject: [PATCH 06/91] [SCSI] 3w-9xxx: scsi_dma_unmap fix This patch fixes the following regression the occurred during the scsi_dma_map()/unmap() changes: 3w-9xxx 0001:45:00.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=36 bytes] Signed-off-by: Adam Radford Signed-off-by: James Bottomley --- drivers/scsi/3w-9xxx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 8b7983aba8f7..36c21b19e5d7 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1978,7 +1978,8 @@ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id) { struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - scsi_dma_unmap(cmd); + if (cmd->SCp.phase == TW_PHASE_SGLIST) + scsi_dma_unmap(cmd); } /* End twa_unmap_scsi_data() */ /* scsi_host_template initializer */ From 7b14f58ad65f9d74e4273fb45360cfea824495aa Mon Sep 17 00:00:00 2001 From: adam radford Date: Mon, 11 May 2009 14:55:55 -0700 Subject: [PATCH 07/91] [SCSI] 3w-xxxx: scsi_dma_unmap fix This patch fixes the following regression that occurred during the scsi_dma_map()/unmap() changes when compiling with CONFIG_DMA_API_DEBUG=y : WARNING: at lib/dma-debug.c:496 check_unmap+0x142/0x542() Hardware name: 3w-xxxx 0000:02:02.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=36 bytes] Signed-off-by: Adam Radford Signed-off-by: James Bottomley --- drivers/scsi/3w-xxxx.c | 5 +++-- drivers/scsi/3w-xxxx.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index c03f1d2c9e2e..faa0fcfed71e 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -6,7 +6,7 @@ Arnaldo Carvalho de Melo Brad Strand - Copyright (C) 1999-2007 3ware Inc. + Copyright (C) 1999-2009 3ware Inc. Kernel compatiblity By: Andre Hedrick Non-Copyright (C) 2000 Andre Hedrick @@ -1294,7 +1294,8 @@ static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) { dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n"); - scsi_dma_unmap(cmd); + if (cmd->SCp.phase == TW_PHASE_SGLIST) + scsi_dma_unmap(cmd); } /* End tw_unmap_scsi_data() */ /* This function will reset a device extension */ diff --git a/drivers/scsi/3w-xxxx.h b/drivers/scsi/3w-xxxx.h index 8e71e5e122b3..a5a2ba2561d9 100644 --- a/drivers/scsi/3w-xxxx.h +++ b/drivers/scsi/3w-xxxx.h @@ -6,7 +6,7 @@ Arnaldo Carvalho de Melo Brad Strand - Copyright (C) 1999-2007 3ware Inc. + Copyright (C) 1999-2009 3ware Inc. Kernel compatiblity By: Andre Hedrick Non-Copyright (C) 2000 Andre Hedrick From 2b69a8a2b6e5f5d26a038a6494a88a1a776ac88f Mon Sep 17 00:00:00 2001 From: Eric Moore Date: Mon, 18 May 2009 12:57:24 -0600 Subject: [PATCH 08/91] [SCSI] mpt2sas: fix driver version inconsistency In Commit commit 3b8b5c9b1f08660583e5dfe095c24170df62f1d2 Author: Eric Moore Date: Tue Apr 21 15:44:27 2009 -0600 [SCSI] mpt2sas : bump driver version to 01.100.02.00 The MPT2SAS_MAJOR_VERSION didn't get bumped from 00 to 01 so applications will see it incorrectly as 00.100.02.00 driver instead of 01.100.02.00. Fix by making MPT2SAS_MAJOR_VERSION match the major number in MPT2SAS_DRIVER_VERSION Signed-off-by: Eric Moore Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index babd4cc0cb25..36b1d1052ba1 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -69,7 +69,7 @@ #define MPT2SAS_AUTHOR "LSI Corporation " #define MPT2SAS_DESCRIPTION "LSI MPT Fusion SAS 2.0 Device Driver" #define MPT2SAS_DRIVER_VERSION "01.100.02.00" -#define MPT2SAS_MAJOR_VERSION 00 +#define MPT2SAS_MAJOR_VERSION 01 #define MPT2SAS_MINOR_VERSION 100 #define MPT2SAS_BUILD_VERSION 02 #define MPT2SAS_RELEASE_VERSION 00 From 8e7d2b2c6ecd3c21a54b877eae3d5be48292e6b5 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 8 May 2009 16:13:25 -0700 Subject: [PATCH 09/91] drm/i915: allocate large pointer arrays with vmalloc For awhile now, many of the GEM code paths have allocated page or object arrays with the slab allocator. This is nice and fast, but won't work well if memory is fragmented, since the slab allocator works with physically contiguous memory (i.e. order > 2 allocations are likely to fail fairly early after booting and doing some work). This patch works around the issue by falling back to vmalloc for >PAGE_SIZE allocations. This is ugly, but much less work than chaining a bunch of pages together by hand (suprisingly there's not a bunch of generic kernel helpers for this yet afaik). vmalloc space is somewhat precious on 32 bit kernels, but our allocations shouldn't be big enough to cause problems, though they're routinely more than a page. Note that this patch doesn't address the unchecked alloc-based-on-ioctl-args in GEM; that needs to be fixed in a separate patch. Also, I've deliberately ignored the DRM's "area" junk. I don't think anyone actually uses it anymore and I'm hoping it gets ripped out soon. [Updated: removed size arg to new free function. We could unify the free functions as well once the DRM mem tracking is ripped out.] fd.o bug #20152 (part 1/3) Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 38 +++++++++++++-------------------- include/drm/drmP.h | 24 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b189b49c7602..4a24c90fb940 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -349,7 +349,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -429,7 +429,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, SetPageDirty(user_pages[i]); page_cache_release(user_pages[i]); } - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -649,7 +649,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -719,7 +719,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, out_unpin_pages: for (i = 0; i < pinned_pages; i++) page_cache_release(user_pages[i]); - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -824,7 +824,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -902,7 +902,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, fail_put_user_pages: for (i = 0; i < pinned_pages; i++) page_cache_release(user_pages[i]); - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -1408,9 +1408,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) } obj_priv->dirty = 0; - drm_free(obj_priv->pages, - page_count * sizeof(struct page *), - DRM_MEM_DRIVER); + drm_free_large(obj_priv->pages); obj_priv->pages = NULL; } @@ -2024,8 +2022,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) */ page_count = obj->size / PAGE_SIZE; BUG_ON(obj_priv->pages != NULL); - obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), - DRM_MEM_DRIVER); + obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); if (obj_priv->pages == NULL) { DRM_ERROR("Faled to allocate page list\n"); obj_priv->pages_refcount--; @@ -3111,7 +3108,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, reloc_count += exec_list[i].relocation_count; } - *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); + *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); if (*relocs == NULL) return -ENOMEM; @@ -3125,8 +3122,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, exec_list[i].relocation_count * sizeof(**relocs)); if (ret != 0) { - drm_free(*relocs, reloc_count * sizeof(**relocs), - DRM_MEM_DRIVER); + drm_free_large(*relocs); *relocs = NULL; return -EFAULT; } @@ -3165,7 +3161,7 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, } err: - drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); + drm_free_large(relocs); return ret; } @@ -3198,10 +3194,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } /* Copy in the exec list from userland */ - exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, - DRM_MEM_DRIVER); - object_list = drm_calloc(sizeof(*object_list), args->buffer_count, - DRM_MEM_DRIVER); + exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count); + object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count); if (exec_list == NULL || object_list == NULL) { DRM_ERROR("Failed to allocate exec or object list " "for %d buffers\n", @@ -3462,10 +3456,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } pre_mutex_err: - drm_free(object_list, sizeof(*object_list) * args->buffer_count, - DRM_MEM_DRIVER); - drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, - DRM_MEM_DRIVER); + drm_free_large(object_list); + drm_free_large(exec_list); drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects, DRM_MEM_DRIVER); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index c8c422151431..b84d8ae35e6f 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1519,6 +1519,30 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area) { return kcalloc(nmemb, size, GFP_KERNEL); } + +static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) +{ + u8 *addr; + + if (size <= PAGE_SIZE) + return kcalloc(nmemb, size, GFP_KERNEL); + + addr = vmalloc(nmemb * size); + if (!addr) + return NULL; + + memset(addr, 0, nmemb * size); + + return addr; +} + +static __inline void drm_free_large(void *ptr) +{ + if (!is_vmalloc_addr(ptr)) + return kfree(ptr); + + vfree(ptr); +} #else extern void *drm_alloc(size_t size, int area); extern void drm_free(void *pt, size_t size, int area); From 2b611cb6eed04062d0a9861c82248e02c844ba3f Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Fri, 27 Mar 2009 17:47:27 -0400 Subject: [PATCH 10/91] ath5k: fix scanning in AR2424 AR5K_PHY_PLL_40MHZ_5413 should not be ORed with AR5K_PHY_MODE_RAD_RF5112 for 5 GHz channels. The incorrect PLL value breaks scanning in the countries where 5 GHz channels are allowed. Signed-off-by: Pavel Roskin Acked-by: Nick Kossifidis Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath5k/reset.c b/drivers/net/wireless/ath5k/reset.c index 7a17d31b2fd9..faede828c8ff 100644 --- a/drivers/net/wireless/ath5k/reset.c +++ b/drivers/net/wireless/ath5k/reset.c @@ -359,7 +359,7 @@ int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial) mode |= AR5K_PHY_MODE_FREQ_5GHZ; if (ah->ah_radio == AR5K_RF5413) - clock |= AR5K_PHY_PLL_40MHZ_5413; + clock = AR5K_PHY_PLL_40MHZ_5413; else clock |= AR5K_PHY_PLL_40MHZ; From 88f16db7a2fa63b9242e8a0fbc40d51722f2e2f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 May 2009 12:04:30 +0200 Subject: [PATCH 11/91] wext: verify buffer size for SIOCSIWENCODEEXT Another design flaw in wireless extensions (is anybody surprised?) in the way it handles the iw_encode_ext structure: The structure is part of the 'extra' memory but contains the key length explicitly, instead of it just being the length of the extra buffer - size of the struct and using the explicit key length only for the get operation (which only writes it). Therefore, we have this layout: extra: +-------------------------+ | struct iw_encode_ext { | | ... | | u16 key_len; | | u8 key[0]; | | }; | +-------------------------+ | key material | +-------------------------+ Now, all drivers I checked use ext->key_len without checking that both key_len and the struct fit into the extra buffer that has been copied from userspace. This leads to a buffer overrun while reading that buffer, depending on the driver it may be possible to specify arbitrary key_len or it may need to be a proper length for the key algorithm specified. Thankfully, this is only exploitable by root, but root can actually cause a segfault or use kernel memory as a key (which you can even get back with siocgiwencode or siocgiwencodeext from the key buffer). Fix this by verifying that key_len fits into the buffer along with struct iw_encode_ext. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/wext.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/wireless/wext.c b/net/wireless/wext.c index cb6a5bb85d80..0e59f9ae9b81 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, err = -EFAULT; goto out; } + + if (cmd == SIOCSIWENCODEEXT) { + struct iw_encode_ext *ee = (void *) extra; + + if (iwp->length < sizeof(*ee) + ee->key_len) + return -EFAULT; + } } err = handler(dev, info, (union iwreq_data *) iwp, extra); From a54be5d43aa2d6febc5a4f8dd3b87b9429b60437 Mon Sep 17 00:00:00 2001 From: Forrest Zhang Date: Wed, 13 May 2009 11:14:39 -0400 Subject: [PATCH 12/91] ath5k: fix exp off-by-one when computing OFDM delta slope Commit e8f055f0c3b ("ath5k: Update reset code") subtly changed the code that computes floating point values for the PHY3_TIMING register such that the exponent is off by a decimal point, which can cause problems with OFDM channel operation. get_bitmask_order() actually returns the highest bit set plus one, whereas the previous code wanted the highest bit set. Instead, use ilog2 which is what this code is really calculating. Also check coef_scaled to handle the (invalid) case where we need log2(0). Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/reset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath5k/reset.c b/drivers/net/wireless/ath5k/reset.c index faede828c8ff..5f72c111c2e8 100644 --- a/drivers/net/wireless/ath5k/reset.c +++ b/drivers/net/wireless/ath5k/reset.c @@ -26,7 +26,7 @@ \*****************************/ #include /* To determine if a card is pci-e */ -#include /* For get_bitmask_order */ +#include #include "ath5k.h" #include "reg.h" #include "base.h" @@ -69,10 +69,10 @@ static inline int ath5k_hw_write_ofdm_timings(struct ath5k_hw *ah, /* Get exponent * ALGO: coef_exp = 14 - highest set bit position */ - coef_exp = get_bitmask_order(coef_scaled); + coef_exp = ilog2(coef_scaled); /* Doesn't make sense if it's zero*/ - if (!coef_exp) + if (!coef_scaled || !coef_exp) return -EINVAL; /* Note: we've shifted coef_scaled by 24 */ From fbc9f97bbf5e1eaee562eba93dc60faaff3f3bfa Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Fri, 15 May 2009 16:13:46 -0700 Subject: [PATCH 13/91] iwlwifi: do not cancel delayed work inside spin_lock_irqsave Calling cancel_delayed_work() from inside spin_lock_irqsave, introduces a potential deadlock. As explained by Johannes Berg A - lock T - timer phase CPU 1 CPU 2 --------------------------------------------- some place that calls cancel_timer_sync() (which is the | code) lock-irq(A) | "lock-irq"(T) | "unlock"(T) | wait(T) unlock(A) timer softirq "lock"(T) run(T) "unlock"(T) irq handler lock(A) unlock(A) Now all that again, interleaved, leading to deadlock: lock-irq(A) "lock"(T) run(T) IRQ during or maybe before run(T) --> lock(A) "lock-irq"(T) wait(T) We fix this by moving the call to cancel_delayed_work() into workqueue. There are cases where the work may not actually be queued or running at the time we are trying to cancel it, but cancel_delayed_work() is able to deal with this. Also cleanup iwl_set_mode related to this call. This function (iwl_set_mode) is only called when bringing interface up and there will thus not be any scanning done. No need to try to cancel scanning. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=13224, which was also reported at http://marc.info/?l=linux-wireless&m=124081921903223&w=2 . Tested-by: Miles Lane Signed-off-by: Reinette Chatre Acked-by: Zhu Yi Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn.c | 7 ------- drivers/net/wireless/iwlwifi/iwl-scan.c | 7 ++++--- drivers/net/wireless/iwlwifi/iwl3945-base.c | 9 ++------- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 3bb28db4a40f..f46ba2475776 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -669,13 +669,6 @@ static int iwl_set_mode(struct iwl_priv *priv, int mode) if (!iwl_is_ready_rf(priv)) return -EAGAIN; - cancel_delayed_work(&priv->scan_check); - if (iwl_scan_cancel_timeout(priv, 100)) { - IWL_WARN(priv, "Aborted scan still in progress after 100ms\n"); - IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n"); - return -EAGAIN; - } - iwl_commit_rxon(priv); return 0; diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index e7c65c4f741b..6330b91e37ce 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -227,9 +227,6 @@ static void iwl_rx_scan_complete_notif(struct iwl_priv *priv, /* The HW is no longer scanning */ clear_bit(STATUS_SCAN_HW, &priv->status); - /* The scan completion notification came in, so kill that timer... */ - cancel_delayed_work(&priv->scan_check); - IWL_DEBUG_INFO(priv, "Scan pass on %sGHz took %dms\n", (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) ? "2.4" : "5.2", @@ -712,6 +709,8 @@ static void iwl_bg_request_scan(struct work_struct *data) mutex_lock(&priv->mutex); + cancel_delayed_work(&priv->scan_check); + if (!iwl_is_ready(priv)) { IWL_WARN(priv, "request scan called when driver not ready.\n"); goto done; @@ -925,6 +924,8 @@ void iwl_bg_scan_completed(struct work_struct *work) IWL_DEBUG_SCAN(priv, "SCAN complete scan\n"); + cancel_delayed_work(&priv->scan_check); + ieee80211_scan_completed(priv->hw, false); if (test_bit(STATUS_EXIT_PENDING, &priv->status)) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 4cce66133500..ff4d0e41d7c4 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -782,13 +782,6 @@ static int iwl3945_set_mode(struct iwl_priv *priv, int mode) if (!iwl_is_ready_rf(priv)) return -EAGAIN; - cancel_delayed_work(&priv->scan_check); - if (iwl_scan_cancel_timeout(priv, 100)) { - IWL_WARN(priv, "Aborted scan still in progress after 100ms\n"); - IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n"); - return -EAGAIN; - } - iwl3945_commit_rxon(priv); return 0; @@ -3298,6 +3291,8 @@ static void iwl3945_bg_request_scan(struct work_struct *data) mutex_lock(&priv->mutex); + cancel_delayed_work(&priv->scan_check); + if (!iwl_is_ready(priv)) { IWL_WARN(priv, "request scan called when driver not ready.\n"); goto done; From 875690c378d64d9ee2de15cad8206d3f11ae5096 Mon Sep 17 00:00:00 2001 From: Fabio Rossi Date: Wed, 1 Apr 2009 20:37:50 +0200 Subject: [PATCH 14/91] ath5k: fix interpolation with equal power levels When the EEPROM contains weird values for the power levels we have to fix the interpolation process. Signed-off-by: Fabio Rossi Acked-by: Nick Kossifidis Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/phy.c | 43 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/ath5k/phy.c b/drivers/net/wireless/ath5k/phy.c index 9e2faae5ae94..b48b29dca3d2 100644 --- a/drivers/net/wireless/ath5k/phy.c +++ b/drivers/net/wireless/ath5k/phy.c @@ -1487,28 +1487,35 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 *stepR, { s8 tmp; s16 min_pwrL, min_pwrR; - s16 pwr_i = pwrL[0]; + s16 pwr_i; - do { - pwr_i--; - tmp = (s8) ath5k_get_interpolated_value(pwr_i, - pwrL[0], pwrL[1], - stepL[0], stepL[1]); + if (pwrL[0] == pwrL[1]) + min_pwrL = pwrL[0]; + else { + pwr_i = pwrL[0]; + do { + pwr_i--; + tmp = (s8) ath5k_get_interpolated_value(pwr_i, + pwrL[0], pwrL[1], + stepL[0], stepL[1]); + } while (tmp > 1); - } while (tmp > 1); + min_pwrL = pwr_i; + } - min_pwrL = pwr_i; + if (pwrR[0] == pwrR[1]) + min_pwrR = pwrR[0]; + else { + pwr_i = pwrR[0]; + do { + pwr_i--; + tmp = (s8) ath5k_get_interpolated_value(pwr_i, + pwrR[0], pwrR[1], + stepR[0], stepR[1]); + } while (tmp > 1); - pwr_i = pwrR[0]; - do { - pwr_i--; - tmp = (s8) ath5k_get_interpolated_value(pwr_i, - pwrR[0], pwrR[1], - stepR[0], stepR[1]); - - } while (tmp > 1); - - min_pwrR = pwr_i; + min_pwrR = pwr_i; + } /* Keep the right boundary so that it works for both curves */ return max(min_pwrL, min_pwrR); From 267d493b322b05984048aef8ea9b5b213490bbe0 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 20 May 2009 10:51:41 -0400 Subject: [PATCH 15/91] airo: fix airo_get_encode{,ext} buffer overflow like I mean it... "airo: airo_get_encode{,ext} potential buffer overflow" was actually a no-op, due to an unrecognized type overflow in an assignment. Oddly, gcc only seems to tell me about it when using -Wextra...grrr... Signed-off-by: John W. Linville --- drivers/net/wireless/airo.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index d73475739127..9eabf4d1f2e7 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -6467,6 +6467,7 @@ static int airo_get_encode(struct net_device *dev, { struct airo_info *local = dev->ml_priv; int index = (dwrq->flags & IW_ENCODE_INDEX) - 1; + int wep_key_len; u8 buf[16]; if (!local->wep_capable) @@ -6500,11 +6501,13 @@ static int airo_get_encode(struct net_device *dev, dwrq->flags |= index + 1; /* Copy the key to the user buffer */ - dwrq->length = get_wep_key(local, index, &buf[0], sizeof(buf)); - if (dwrq->length != -1) - memcpy(extra, buf, dwrq->length); - else + wep_key_len = get_wep_key(local, index, &buf[0], sizeof(buf)); + if (wep_key_len < 0) { dwrq->length = 0; + } else { + dwrq->length = wep_key_len; + memcpy(extra, buf, dwrq->length); + } return 0; } @@ -6617,7 +6620,7 @@ static int airo_get_encodeext(struct net_device *dev, struct airo_info *local = dev->ml_priv; struct iw_point *encoding = &wrqu->encoding; struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; - int idx, max_key_len; + int idx, max_key_len, wep_key_len; u8 buf[16]; if (!local->wep_capable) @@ -6661,11 +6664,13 @@ static int airo_get_encodeext(struct net_device *dev, memset(extra, 0, 16); /* Copy the key to the user buffer */ - ext->key_len = get_wep_key(local, idx, &buf[0], sizeof(buf)); - if (ext->key_len != -1) - memcpy(extra, buf, ext->key_len); - else + wep_key_len = get_wep_key(local, idx, &buf[0], sizeof(buf)); + if (wep_key_len < 0) { ext->key_len = 0; + } else { + ext->key_len = wep_key_len; + memcpy(extra, buf, ext->key_len); + } return 0; } From 5078b2e32ad4b1f753b1c837c15892202f753c97 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 13 May 2009 17:04:42 -0400 Subject: [PATCH 16/91] cfg80211: fix race between core hint and driver's custom apply Its possible for cfg80211 to have scheduled the work and for the global workqueue to not have kicked in prior to a cfg80211 driver's regulatory hint or wiphy_apply_custom_regulatory(). Although this is very unlikely its possible and should fix this race. When this race would happen you are expected to have hit a null pointer dereference panic. Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Tested-by: Alan Jenkins Signed-off-by: John W. Linville --- net/wireless/reg.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 08265ca15785..487cb627ddba 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1551,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2) queue_regulatory_request(request); + /* + * This ensures last_request is populated once modules + * come swinging in and calling regulatory hints and + * wiphy_apply_custom_regulatory(). + */ + flush_scheduled_work(); + return 0; } From c9d2fbf36df5e04efa226614093bb1bacc6fe131 Mon Sep 17 00:00:00 2001 From: Jay Sternberg Date: Tue, 19 May 2009 14:56:36 -0700 Subject: [PATCH 17/91] iwlwifi: update 5000 ucode support to version 2 of API enable iwl driver to support 5000 ucode having version 2 of API Signed-off-by: Jay Sternberg Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-5000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index e5ca2511a81a..9452461ce864 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -46,7 +46,7 @@ #include "iwl-6000-hw.h" /* Highest firmware API version supported */ -#define IWL5000_UCODE_API_MAX 1 +#define IWL5000_UCODE_API_MAX 2 #define IWL5150_UCODE_API_MAX 2 /* Lowest firmware API version supported */ From a6c67339784db5763d6f20ae1881aeebe8c5a9f4 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 20 May 2009 02:12:56 +0200 Subject: [PATCH 18/91] wireless: beyond ARRAY_SIZE of intf->crypto_stats Do not go beyond ARRAY_SIZE of intf->crypto_stats Signed-off-by: Roel Kluin Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c index 07d378ef0b46..7b3ee8c2eaef 100644 --- a/drivers/net/wireless/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/rt2x00/rt2x00debug.c @@ -138,7 +138,7 @@ void rt2x00debug_update_crypto(struct rt2x00_dev *rt2x00dev, if (cipher == CIPHER_TKIP_NO_MIC) cipher = CIPHER_TKIP; - if (cipher == CIPHER_NONE || cipher > CIPHER_MAX) + if (cipher == CIPHER_NONE || cipher >= CIPHER_MAX) return; /* Remove CIPHER_NONE index */ From cf8da764fc6959b7efb482f375dfef9830e98205 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2009 18:54:22 +0000 Subject: [PATCH 19/91] net: fix length computation in rt_check_expire() rt_check_expire() computes average and standard deviation of chain lengths, but not correclty reset length to 0 at beginning of each chain. This probably gives overflows for sum2 (and sum) on loaded machines instead of meaningful results. Signed-off-by: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv4/route.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c4c60e9f068a..869cf1c44b78 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -785,7 +785,7 @@ static void rt_check_expire(void) static unsigned int rover; unsigned int i = rover, goal; struct rtable *rth, **rthp; - unsigned long length = 0, samples = 0; + unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -795,9 +795,9 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; - length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; @@ -809,6 +809,7 @@ static void rt_check_expire(void) if (*rthp == NULL) continue; + length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { if (rt_is_expired(rth)) { From 1ddbcb005c395518c2cd0df504cff3d4b5c85853 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2009 20:14:28 +0000 Subject: [PATCH 20/91] net: fix rtable leak in net/ipv4/route.c Alexander V. Lukyanov found a regression in 2.6.29 and made a complete analysis found in http://bugzilla.kernel.org/show_bug.cgi?id=13339 Quoted here because its a perfect one : begin_of_quotation 2.6.29 patch has introduced flexible route cache rebuilding. Unfortunately the patch has at least one critical flaw, and another problem. rt_intern_hash calculates rthi pointer, which is later used for new entry insertion. The same loop calculates cand pointer which is used to clean the list. If the pointers are the same, rtable leak occurs, as first the cand is removed then the new entry is appended to it. This leak leads to unregister_netdevice problem (usage count > 0). Another problem of the patch is that it tries to insert the entries in certain order, to facilitate counting of entries distinct by all but QoS parameters. Unfortunately, referencing an existing rtable entry moves it to list beginning, to speed up further lookups, so the carefully built order is destroyed. For the first problem the simplest patch it to set rthi=0 when rthi==cand, but it will also destroy the ordering. end_of_quotation Problematic commit is 1080d709fb9d8cd4392f93476ee46a9d6ea05a5b (net: implement emergency route cache rebulds when gc_elasticity is exceeded) Trying to keep dst_entries ordered is too complex and breaks the fact that order should depend on the frequency of use for garbage collection. A possible fix is to make rt_intern_hash() simpler, and only makes rt_check_expire() a litle bit smarter, being able to cope with an arbitrary entries order. The added loop is running on cache hot data, while cpu is prefetching next object, so should be unnoticied. Reported-and-analyzed-by: Alexander V. Lukyanov Signed-off-by: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv4/route.c | 55 +++++++++++++++--------------------------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 869cf1c44b78..28205e5bfa9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -784,7 +784,7 @@ static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, **rthp; + struct rtable *rth, *aux, **rthp; unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -812,6 +812,7 @@ static void rt_check_expire(void) length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + prefetch(rth->u.dst.rt_next); if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); @@ -820,33 +821,30 @@ static void rt_check_expire(void) if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { +nofree: tmo >>= 1; rthp = &rth->u.dst.rt_next; /* - * Only bump our length if the hash - * inputs on entries n and n+1 are not - * the same, we only count entries on + * We only count entries on * a chain with equal hash inputs once * so that entries for different QOS * levels, and other non-hash input * attributes don't unfairly skew * the length computation */ - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; + for (aux = rt_hash_table[i].chain;;) { + if (aux == rth) { + length += ONE; + break; + } + if (compare_hash_inputs(&aux->fl, &rth->fl)) + break; + aux = aux->u.dst.rt_next; + } continue; } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - tmo >>= 1; - rthp = &rth->u.dst.rt_next; - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; - continue; - } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; /* Cleanup aged off entries. */ *rthp = rth->u.dst.rt_next; @@ -1069,7 +1067,6 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; - struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1089,7 +1086,6 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) } rthp = &rt_hash_table[hash].chain; - rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1135,17 +1131,6 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) chain_length++; rthp = &rth->u.dst.rt_next; - - /* - * check to see if the next entry in the chain - * contains the same hash input values as rt. If it does - * This is where we will insert into the list, instead of - * at the head. This groups entries that differ by aspects not - * relvant to the hash function together, which we use to adjust - * our chain length - */ - if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) - rthi = rth; } if (cand) { @@ -1206,10 +1191,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) } } - if (rthi) - rt->u.dst.rt_next = rthi->u.dst.rt_next; - else - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->u.dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { @@ -1225,10 +1207,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - if (rthi) - rcu_assign_pointer(rthi->u.dst.rt_next, rt); - else - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; From 4f72427998b105392e60bae7a6798a0c96fe4f0a Mon Sep 17 00:00:00 2001 From: Jean-Mickael Guerin Date: Wed, 20 May 2009 17:38:59 -0700 Subject: [PATCH 21/91] IPv6: set RTPROT_KERNEL to initial route The use of unspecified protocol in IPv6 initial route prevents quagga to install IPv6 default route: # show ipv6 route S ::/0 [1/0] via fe80::1, eth1_0 K>* ::/0 is directly connected, lo, rej C>* ::1/128 is directly connected, lo C>* fe80::/64 is directly connected, eth1_0 # ip -6 route fe80::/64 dev eth1_0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit -1 ff00::/8 dev eth1_0 metric 256 mtu 1500 advmss 1440 hoplimit -1 unreachable default dev lo proto none metric -1 error -101 hoplimit 255 The attached patch ensures RTPROT_KERNEL to the default initial route and fixes the problem for quagga. This is similar to "ipv6: protocol for address routes" f410a1fba7afa79d2992620e874a343fdba28332. # show ipv6 route S>* ::/0 [1/0] via fe80::1, eth1_0 C>* ::1/128 is directly connected, lo C>* fe80::/64 is directly connected, eth1_0 # ip -6 route fe80::/64 dev eth1_0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit -1 fe80::/64 dev eth1_0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit -1 ff00::/8 dev eth1_0 metric 256 mtu 1500 advmss 1440 hoplimit -1 default via fe80::1 dev eth1_0 proto zebra metric 1024 mtu 1500 advmss 1440 hoplimit -1 unreachable default dev lo proto kernel metric -1 error -101 hoplimit 255 Signed-off-by: Jean-Mickael Guerin Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1394ddb6e35c..032a5ec391c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; From c40499e04b2005e61f989824251f9343b55f96bb Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Thu, 21 May 2009 15:04:15 -0700 Subject: [PATCH 22/91] gigaset: beyond ARRAY_SIZE of iwb->data Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- drivers/isdn/gigaset/isocdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/gigaset/isocdata.c b/drivers/isdn/gigaset/isocdata.c index b171e75cb52e..29808c4fb1cb 100644 --- a/drivers/isdn/gigaset/isocdata.c +++ b/drivers/isdn/gigaset/isocdata.c @@ -175,7 +175,7 @@ int gigaset_isowbuf_getbytes(struct isowbuf_t *iwb, int size) return -EINVAL; } src = iwb->read; - if (unlikely(limit > BAS_OUTBUFSIZE + BAS_OUTBUFPAD || + if (unlikely(limit >= BAS_OUTBUFSIZE + BAS_OUTBUFPAD || (read < src && limit >= src))) { pr_err("isoc write buffer frame reservation violated\n"); return -EFAULT; From 5b5f792a6a9a2f9ae812d151ed621f72e99b1725 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 May 2009 15:07:12 -0700 Subject: [PATCH 23/91] pktgen: do not access flows[] beyond its length typo -- pkt_dev->nflows is for stats only, the number of concurrent flows is stored in cflows. Reported-By: Vladimir Ivashchenko Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 3779c1438c11..0666a827bc62 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i = 0; - for (; i < pkt_dev->nflows; i++){ + for (; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; if (x) { xfrm_state_put(x); From 3ed18d76d959e5cbfa5d70c8f7ba95476582a556 Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Thu, 21 May 2009 15:20:59 -0700 Subject: [PATCH 24/91] ipv4: Fix oops with FIB_TRIE It seems we can fix this by disabling preemption while we re-balance the trie. This is with the CONFIG_CLASSIC_RCU. It's been stress-tested at high loads continuesly taking a full BGP table up/down via iproute -batch. Note. fib_trie is not updated for CONFIG_PREEMPT_RCU Reported-by: Andrei Popa Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec0ae490f0b6..33c7c85dfe40 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + preempt_disable(); + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); + preempt_enable(); return (struct node *)tn; } From 0975ecba3b670df7c488a5e0e6fe9f1f370a8ad8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 May 2009 15:22:02 -0700 Subject: [PATCH 25/91] RxRPC: Error handling for rxrpc_alloc_connection() rxrpc_alloc_connection() doesn't return an error code on failure, it just returns NULL. IS_ERR(NULL) is false. Signed-off-by: Dan Carpenter Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-connection.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 0f1218b8d289..67e38a056240 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection * and then redo the check */ conn = rxrpc_alloc_connection(gfp); - if (IS_ERR(conn)) { - _leave(" = %ld", PTR_ERR(conn)); - return PTR_ERR(conn); + if (!conn) { + _leave(" = -ENOMEM"); + return -ENOMEM; } conn->trans = trans; @@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection and then * redo the check */ candidate = rxrpc_alloc_connection(gfp); - if (IS_ERR(candidate)) { - _leave(" = %ld", PTR_ERR(candidate)); - return PTR_ERR(candidate); + if (!candidate) { + _leave(" = -ENOMEM"); + return -ENOMEM; } candidate->trans = trans; From 88dff4936c0a5fa53080cca68dc963a8a2a674b0 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 22 May 2009 11:35:50 +0800 Subject: [PATCH 26/91] x86: DMI match for the Sony VGN-Z540N as it needs BIOS reboot x86: DMI match for the Sony VGN-Z540N as it needs BIOS reboot, see: http://bugzilla.kernel.org/show_bug.cgi?id=12901 [ Impact: fix hung reboot on certain systems ] Signed-off-by: Zhang Rui Cc: Len Brown LKML-Reference: <1242963350.32574.53.camel@rzhang-dt> Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1340dad417f4..667188e0b5a0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -232,6 +232,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"), }, }, + { /* Handle problems with rebooting on Sony VGN-Z540N */ + .callback = set_bios_reboot, + .ident = "Sony VGN-Z540N", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), + }, + }, { } }; From e069c0cf7c169ae5a8bfdc8d083a5d66fbef73d8 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Fri, 8 May 2009 15:51:44 -0700 Subject: [PATCH 27/91] wimax/i2400m: usb: fix device reset on autosuspend while not yet idle When the i2400m is connected to a network, the host interface (USB) cannot be suspended. For that to happen, the device has to have negotiated with the basestation to put the link on IDLE state. If the host tries to put the device in standby while it is connected but not idle, the device resets, as the driver should not do that. To avoid triggering that, when the USB susbsytem requires the driver to autosuspend the device, the driver checks if the device is not yet idle. If it is not, the request is rejected (will be retried again later on after the autosuspend timeout). At some point the device will enter idle and the request will succeed (unless of course, there is network traffic, but at that point, there is no idle neither in the link or the host interface). Signed-off-by: Inaky Perez-Gonzalez --- drivers/net/wimax/i2400m/usb.c | 35 +++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index ca4151a9e222..17851321b7fd 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -505,27 +505,52 @@ int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg) #ifdef CONFIG_PM struct usb_device *usb_dev = i2400mu->usb_dev; #endif + unsigned is_autosuspend = 0; struct i2400m *i2400m = &i2400mu->i2400m; +#ifdef CONFIG_PM + if (usb_dev->auto_pm > 0) + is_autosuspend = 1; +#endif + d_fnstart(3, dev, "(iface %p pm_msg %u)\n", iface, pm_msg.event); if (i2400m->updown == 0) goto no_firmware; - d_printf(1, dev, "fw up, requesting standby\n"); + if (i2400m->state == I2400M_SS_DATA_PATH_CONNECTED && is_autosuspend) { + /* ugh -- the device is connected and this suspend + * request is an autosuspend one (not a system standby + * / hibernate). + * + * The only way the device can go to standby is if the + * link with the base station is in IDLE mode; that + * were the case, we'd be in status + * I2400M_SS_CONNECTED_IDLE. But we are not. + * + * If we *tell* him to go power save now, it'll reset + * as a precautionary measure, so if this is an + * autosuspend thing, say no and it'll come back + * later, when the link is IDLE + */ + result = -EBADF; + d_printf(1, dev, "fw up, link up, not-idle, autosuspend: " + "not entering powersave\n"); + goto error_not_now; + } + d_printf(1, dev, "fw up: entering powersave\n"); atomic_dec(&i2400mu->do_autopm); result = i2400m_cmd_enter_powersave(i2400m); atomic_inc(&i2400mu->do_autopm); -#ifdef CONFIG_PM - if (result < 0 && usb_dev->auto_pm == 0) { + if (result < 0 && !is_autosuspend) { /* System suspend, can't fail */ dev_err(dev, "failed to suspend, will reset on resume\n"); result = 0; } -#endif if (result < 0) goto error_enter_powersave; i2400mu_notification_release(i2400mu); - d_printf(1, dev, "fw up, got standby\n"); + d_printf(1, dev, "powersave requested\n"); error_enter_powersave: +error_not_now: no_firmware: d_fnend(3, dev, "(iface %p pm_msg %u) = %d\n", iface, pm_msg.event, result); From bca23dba760d6705c013f89113c46570378fb626 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 May 2009 11:46:16 -0700 Subject: [PATCH 28/91] x86, setup: revert ACPI 3 E820 extended attributes support Remove ACPI 3 E820 extended memory attributes support. At least one vendor actively set all the flags to zero, but left ECX on return at 24. This bug may be present in other BIOSes. The breakage functionally means the ACPI 3 flags are probably completely useless, and that no OS any time soon is going to rely on their existence. Therefore, drop support completely. We may want to revisit this question in the future, if we find ourselves actually needing the flags. This reverts all or part of the following checkins: cd670599b7b00d9263f6f11a05c0edeb9cbedaf3 c549e71d073a6e9a4847497344db28a784061455 However, retain the part from the latter commit that copies e820 into a temporary buffer; that is an unrelated BIOS workaround. Put in a comment to explain that part. See https://bugzilla.redhat.com/show_bug.cgi?id=499396 for some additional information. [ Impact: detect all memory on affected machines ] Reported-by: Thomas J. Baker Signed-off-by: H. Peter Anvin Acked-by: Len Brown Cc: Chuck Ebbert Cc: Kyle McMartin Cc: Matt Domsch --- arch/x86/boot/memory.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 5054c2ddd1a0..74b3d2ba84e9 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -17,11 +17,6 @@ #define SMAP 0x534d4150 /* ASCII "SMAP" */ -struct e820_ext_entry { - struct e820entry std; - u32 ext_flags; -} __attribute__((packed)); - static int detect_memory_e820(void) { int count = 0; @@ -29,13 +24,21 @@ static int detect_memory_e820(void) u32 size, id, edi; u8 err; struct e820entry *desc = boot_params.e820_map; - static struct e820_ext_entry buf; /* static so it is zeroed */ + static struct e820entry buf; /* static so it is zeroed */ /* - * Set this here so that if the BIOS doesn't change this field - * but still doesn't change %ecx, we're still okay... + * Note: at least one BIOS is known which assumes that the + * buffer pointed to by one e820 call is the same one as + * the previous call, and only changes modified fields. Therefore, + * we use a temporary buffer and copy the results entry by entry. + * + * This routine deliberately does not try to account for + * ACPI 3+ extended attributes. This is because there are + * BIOSes in the field which report zero for the valid bit for + * all ranges, and we don't currently make any use of the + * other attribute bits. Revisit this if we see the extended + * attribute bits deployed in a meaningful way in the future. */ - buf.ext_flags = 1; do { size = sizeof buf; @@ -66,13 +69,7 @@ static int detect_memory_e820(void) break; } - /* ACPI 3.0 added the extended flags support. If bit 0 - in the extended flags is zero, we're supposed to simply - ignore the entry -- a backwards incompatible change! */ - if (size > 20 && !(buf.ext_flags & 1)) - continue; - - *desc++ = buf.std; + *desc++ = buf; count++; } while (next && count < ARRAY_SIZE(boot_params.e820_map)); From 14b60391587ab9b2207c4fb6281763a93ae85e0f Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 20 May 2009 16:47:08 -0400 Subject: [PATCH 29/91] i915: support 8xx desktop cursors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason we never added 8xx desktop cursor support to the kernel. This patch fixes that. [krh: Also set the size on pre-i915 hw.] Tested-by: Kristian Høgsberg Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_reg.h | 17 +++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 26 ++++++++++++++++++++------ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a24c90fb940..717b6a854bcd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1145,7 +1145,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; } - list_add(&obj_priv->list, &dev_priv->mm.inactive_list); + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); } /* Need a new fence register? */ @@ -1375,7 +1375,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); return ret; } - list_add(&obj_priv->list, &dev_priv->mm.inactive_list); + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); } drm_gem_object_unreference(obj); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 15da44cf21b1..9668cc0d7f4e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1410,9 +1410,25 @@ /* Cursor A & B regs */ #define CURACNTR 0x70080 +/* Old style CUR*CNTR flags (desktop 8xx) */ +#define CURSOR_ENABLE 0x80000000 +#define CURSOR_GAMMA_ENABLE 0x40000000 +#define CURSOR_STRIDE_MASK 0x30000000 +#define CURSOR_FORMAT_SHIFT 24 +#define CURSOR_FORMAT_MASK (0x07 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_2C (0x00 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_3C (0x01 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_4C (0x02 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_ARGB (0x04 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_XRGB (0x05 << CURSOR_FORMAT_SHIFT) +/* New style CUR*CNTR flags */ +#define CURSOR_MODE 0x27 #define CURSOR_MODE_DISABLE 0x00 #define CURSOR_MODE_64_32B_AX 0x07 #define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX) +#define MCURSOR_PIPE_SELECT (1 << 28) +#define MCURSOR_PIPE_A 0x00 +#define MCURSOR_PIPE_B (1 << 28) #define MCURSOR_GAMMA_ENABLE (1 << 26) #define CURABASE 0x70084 #define CURAPOS 0x70088 @@ -1420,6 +1436,7 @@ #define CURSOR_POS_SIGN 0x8000 #define CURSOR_X_SHIFT 0 #define CURSOR_Y_SHIFT 16 +#define CURSIZE 0x700a0 #define CURBCNTR 0x700c0 #define CURBBASE 0x700c4 #define CURBPOS 0x700c8 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3387cf32f385..c9d6f10ba92e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1357,7 +1357,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, int pipe = intel_crtc->pipe; uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR; uint32_t base = (pipe == 0) ? CURABASE : CURBBASE; - uint32_t temp; + uint32_t temp = I915_READ(control); size_t addr; int ret; @@ -1366,7 +1366,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, /* if we want to turn off the cursor ignore width and height */ if (!handle) { DRM_DEBUG("cursor off\n"); - temp = CURSOR_MODE_DISABLE; + if (IS_MOBILE(dev) || IS_I9XX(dev)) { + temp &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE); + temp |= CURSOR_MODE_DISABLE; + } else { + temp &= ~(CURSOR_ENABLE | CURSOR_GAMMA_ENABLE); + } addr = 0; bo = NULL; mutex_lock(&dev->struct_mutex); @@ -1409,10 +1414,19 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, addr = obj_priv->phys_obj->handle->busaddr; } - temp = 0; - /* set the pipe for the cursor */ - temp |= (pipe << 28); - temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE; + if (!IS_I9XX(dev)) + I915_WRITE(CURSIZE, (height << 12) | width); + + /* Hooray for CUR*CNTR differences */ + if (IS_MOBILE(dev) || IS_I9XX(dev)) { + temp &= ~(CURSOR_MODE | MCURSOR_PIPE_SELECT); + temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE; + temp |= (pipe << 28); /* Connect to correct pipe */ + } else { + temp &= ~(CURSOR_FORMAT_MASK); + temp |= CURSOR_ENABLE; + temp |= CURSOR_FORMAT_ARGB | CURSOR_GAMMA_ENABLE; + } finish: I915_WRITE(control, temp); From 8863170628da4b0b461eb96bf797df1dca0bd03e Mon Sep 17 00:00:00 2001 From: Ma Ling Date: Wed, 13 May 2009 11:19:55 +0800 Subject: [PATCH 30/91] drm/i915: Fetch SDVO LVDS mode lines from VBT, then reserve them Signed-off-by: Ma Ling Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/intel_bios.c | 104 +++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_bios.h | 17 +++++ drivers/gpu/drm/i915/intel_lvds.c | 4 +- 4 files changed, 94 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9b149fe824c3..c431fa54bbb5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -180,7 +180,8 @@ typedef struct drm_i915_private { int backlight_duty_cycle; /* restore backlight to this value */ bool panel_wants_dither; struct drm_display_mode *panel_fixed_mode; - struct drm_display_mode *vbt_mode; /* if any */ + struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */ + struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */ /* Feature bits from the VBIOS */ unsigned int int_tv_support:1; diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index fc28e2bbd542..9d78cff33b24 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -57,9 +57,43 @@ find_section(struct bdb_header *bdb, int section_id) return NULL; } -/* Try to find panel data */ static void -parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) +fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode, + struct lvds_dvo_timing *dvo_timing) +{ + panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) | + dvo_timing->hactive_lo; + panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay + + ((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo); + panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start + + dvo_timing->hsync_pulse_width; + panel_fixed_mode->htotal = panel_fixed_mode->hdisplay + + ((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo); + + panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) | + dvo_timing->vactive_lo; + panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay + + dvo_timing->vsync_off; + panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start + + dvo_timing->vsync_pulse_width; + panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay + + ((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo); + panel_fixed_mode->clock = dvo_timing->clock * 10; + panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED; + + /* Some VBTs have bogus h/vtotal values */ + if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) + panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1; + if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal) + panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1; + + drm_mode_set_name(panel_fixed_mode); +} + +/* Try to find integrated panel data */ +static void +parse_lfp_panel_data(struct drm_i915_private *dev_priv, + struct bdb_header *bdb) { struct bdb_lvds_options *lvds_options; struct bdb_lvds_lfp_data *lvds_lfp_data; @@ -91,35 +125,9 @@ parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode), DRM_MEM_DRIVER); - panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) | - dvo_timing->hactive_lo; - panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay + - ((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo); - panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start + - dvo_timing->hsync_pulse_width; - panel_fixed_mode->htotal = panel_fixed_mode->hdisplay + - ((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo); + fill_detail_timing_data(panel_fixed_mode, dvo_timing); - panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) | - dvo_timing->vactive_lo; - panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay + - dvo_timing->vsync_off; - panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start + - dvo_timing->vsync_pulse_width; - panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay + - ((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo); - panel_fixed_mode->clock = dvo_timing->clock * 10; - panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED; - - /* Some VBTs have bogus h/vtotal values */ - if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) - panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1; - if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal) - panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1; - - drm_mode_set_name(panel_fixed_mode); - - dev_priv->vbt_mode = panel_fixed_mode; + dev_priv->lfp_lvds_vbt_mode = panel_fixed_mode; DRM_DEBUG("Found panel mode in BIOS VBT tables:\n"); drm_mode_debug_printmodeline(panel_fixed_mode); @@ -127,6 +135,39 @@ parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) return; } +/* Try to find sdvo panel data */ +static void +parse_sdvo_panel_data(struct drm_i915_private *dev_priv, + struct bdb_header *bdb) +{ + struct bdb_sdvo_lvds_options *sdvo_lvds_options; + struct lvds_dvo_timing *dvo_timing; + struct drm_display_mode *panel_fixed_mode; + + dev_priv->sdvo_lvds_vbt_mode = NULL; + + sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS); + if (!sdvo_lvds_options) + return; + + dvo_timing = find_section(bdb, BDB_SDVO_PANEL_DTDS); + if (!dvo_timing) + return; + + panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode), + DRM_MEM_DRIVER); + + if (!panel_fixed_mode) + return; + + fill_detail_timing_data(panel_fixed_mode, + dvo_timing + sdvo_lvds_options->panel_type); + + dev_priv->sdvo_lvds_vbt_mode = panel_fixed_mode; + + return; +} + static void parse_general_features(struct drm_i915_private *dev_priv, struct bdb_header *bdb) @@ -199,7 +240,8 @@ intel_init_bios(struct drm_device *dev) /* Grab useful general definitions */ parse_general_features(dev_priv, bdb); - parse_panel_data(dev_priv, bdb); + parse_lfp_panel_data(dev_priv, bdb); + parse_sdvo_panel_data(dev_priv, bdb); pci_unmap_rom(pdev, bios); diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h index de621aad85b5..8ca2cde15804 100644 --- a/drivers/gpu/drm/i915/intel_bios.h +++ b/drivers/gpu/drm/i915/intel_bios.h @@ -279,6 +279,23 @@ struct vch_bdb_22 { struct vch_panel_data panels[16]; } __attribute__((packed)); +struct bdb_sdvo_lvds_options { + u8 panel_backlight; + u8 h40_set_panel_type; + u8 panel_type; + u8 ssc_clk_freq; + u16 als_low_trip; + u16 als_high_trip; + u8 sclalarcoeff_tab_row_num; + u8 sclalarcoeff_tab_row_size; + u8 coefficient[8]; + u8 panel_misc_bits_1; + u8 panel_misc_bits_2; + u8 panel_misc_bits_3; + u8 panel_misc_bits_4; +} __attribute__((packed)); + + bool intel_init_bios(struct drm_device *dev); /* diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 439a86514993..53731f0ffcb5 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -511,10 +511,10 @@ void intel_lvds_init(struct drm_device *dev) } /* Failed to get EDID, what about VBT? */ - if (dev_priv->vbt_mode) { + if (dev_priv->lfp_lvds_vbt_mode) { mutex_lock(&dev->mode_config.mutex); dev_priv->panel_fixed_mode = - drm_mode_duplicate(dev, dev_priv->vbt_mode); + drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode); mutex_unlock(&dev->mode_config.mutex); if (dev_priv->panel_fixed_mode) { dev_priv->panel_fixed_mode->type |= From 7086c87fb1446ceb37918ffa0941359a7c2ec6cf Mon Sep 17 00:00:00 2001 From: Ma Ling Date: Wed, 13 May 2009 11:20:06 +0800 Subject: [PATCH 31/91] drm/i915: Return SDVO LVDS VBT mode if no EDID modes are detected. Some new SDVO LVDS hardware doesn't have DDC available, and this should fix the display on it. Signed-off-by: Ma Ling Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_sdvo.c | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 9913651c1e17..f79ebf4de63c 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -69,6 +69,10 @@ struct intel_sdvo_priv { * This is set if we treat the device as HDMI, instead of DVI. */ bool is_hdmi; + /** + * This is set if we detect output of sdvo device as LVDS. + */ + bool is_lvds; /** * Returned SDTV resolutions allowed for the current format, if the @@ -1543,6 +1547,37 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) } } +static void intel_sdvo_get_lvds_modes(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + struct drm_i915_private *dev_priv = connector->dev->dev_private; + + /* + * Attempt to get the mode list from DDC. + * Assume that the preferred modes are + * arranged in priority order. + */ + /* set the bus switch and get the modes */ + intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); + intel_ddc_get_modes(intel_output); + if (list_empty(&connector->probed_modes) == false) + return; + + /* Fetch modes from VBT */ + if (dev_priv->sdvo_lvds_vbt_mode != NULL) { + struct drm_display_mode *newmode; + newmode = drm_mode_duplicate(connector->dev, + dev_priv->sdvo_lvds_vbt_mode); + if (newmode != NULL) { + /* Guarantee the mode is preferred */ + newmode->type = (DRM_MODE_TYPE_PREFERRED | + DRM_MODE_TYPE_DRIVER); + drm_mode_probed_add(connector, newmode); + } + } +} + static int intel_sdvo_get_modes(struct drm_connector *connector) { struct intel_output *output = to_intel_output(connector); @@ -1550,6 +1585,8 @@ static int intel_sdvo_get_modes(struct drm_connector *connector) if (sdvo_priv->is_tv) intel_sdvo_get_tv_modes(connector); + else if (sdvo_priv->is_lvds == true) + intel_sdvo_get_lvds_modes(connector); else intel_sdvo_get_ddc_modes(connector); @@ -1720,6 +1757,8 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) } } + /* In defaut case sdvo lvds is false */ + sdvo_priv->is_lvds = false; intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); if (sdvo_priv->caps.output_flags & @@ -1773,6 +1812,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_LVDS; connector_type = DRM_MODE_CONNECTOR_LVDS; + sdvo_priv->is_lvds = true; } else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1) { @@ -1780,6 +1820,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_LVDS; connector_type = DRM_MODE_CONNECTOR_LVDS; + sdvo_priv->is_lvds = true; } else { From ad5b2a6db3eddc41358d8a73f5cfe1c38e7e3a19 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Fri, 15 May 2009 09:10:41 +0200 Subject: [PATCH 32/91] drm/i915: Determine type before initialising connector drm_connector_init sets both the connector type and the connector type_id on the newly initialised connector. As the connector type_id is coupled to the connector type, the connector type cannot simply be modified on an initialised connector. This patch changes the order of operations on intel_sdvo_init so that the type is determined before the connector is intialised. This fixes a bug whereby the name card0-VGA-1 would be allocted to both a CRT and an SDVO connector since the SDVO connector would be initialised with type 'unknown' and hence have its type_id assigned from the wrong pool. Signed-off-by: Jonas Bonn Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_sdvo.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index f79ebf4de63c..ded122c1ae2d 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1713,17 +1713,9 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) return false; } - connector = &intel_output->base; - - drm_connector_init(dev, connector, &intel_sdvo_connector_funcs, - DRM_MODE_CONNECTOR_Unknown); - drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs); sdvo_priv = (struct intel_sdvo_priv *)(intel_output + 1); intel_output->type = INTEL_OUTPUT_SDVO; - connector->interlace_allowed = 0; - connector->doublescan_allowed = 0; - /* setup the DDC bus. */ if (output_device == SDVOB) i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOB"); @@ -1731,7 +1723,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOC"); if (!i2cbus) - goto err_connector; + goto err_inteloutput; sdvo_priv->i2c_bus = i2cbus; @@ -1747,7 +1739,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) intel_output->i2c_bus = i2cbus; intel_output->dev_priv = sdvo_priv; - /* Read the regs to test if we can talk to the device */ for (i = 0; i < 0x40; i++) { if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) { @@ -1768,7 +1759,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) else sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_TMDS; connector_type = DRM_MODE_CONNECTOR_DVID; @@ -1786,7 +1776,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_SVID0) { sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_TVDAC; connector_type = DRM_MODE_CONNECTOR_SVIDEO; sdvo_priv->is_tv = true; @@ -1795,21 +1784,18 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0) { sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_DAC; connector_type = DRM_MODE_CONNECTOR_VGA; } else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1) { sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_DAC; connector_type = DRM_MODE_CONNECTOR_VGA; } else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS0) { sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_LVDS; connector_type = DRM_MODE_CONNECTOR_LVDS; sdvo_priv->is_lvds = true; @@ -1817,7 +1803,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1) { sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_LVDS; connector_type = DRM_MODE_CONNECTOR_LVDS; sdvo_priv->is_lvds = true; @@ -1836,9 +1821,16 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) goto err_i2c; } + connector = &intel_output->base; + drm_connector_init(dev, connector, &intel_sdvo_connector_funcs, + connector_type); + drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs); + connector->interlace_allowed = 0; + connector->doublescan_allowed = 0; + connector->display_info.subpixel_order = SubPixelHorizontalRGB; + drm_encoder_init(dev, &intel_output->enc, &intel_sdvo_enc_funcs, encoder_type); drm_encoder_helper_add(&intel_output->enc, &intel_sdvo_helper_funcs); - connector->connector_type = connector_type; drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); drm_sysfs_connector_add(connector); @@ -1876,8 +1868,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) err_i2c: intel_i2c_destroy(intel_output->i2c_bus); -err_connector: - drm_connector_cleanup(connector); +err_inteloutput: kfree(intel_output); return false; From 0c752a93353d9b17dbe148312d732fbe06d235e1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 22 May 2009 12:17:45 -0700 Subject: [PATCH 33/91] x86: introduce noxsave boot parameter Introduce "noxsave" boot parameter which will disable the cpu's xsave/xrstor capabilities. Useful for debugging and working around xsave related issues. [ Impact: make it possible to debug problems in the field ] Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/kernel/cpu/common.c | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e87bdbfbcc75..fd5cac013037 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1535,6 +1535,10 @@ and is between 256 and 4096 characters. It is defined in the file register save and restore. The kernel will only save legacy floating-point registers on task switch. + noxsave [BUGS=X86] Disables x86 extended register state save + and restore using xsave. The kernel will fallback to + enabling legacy floating-point and sse state. + nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or wfi(ARM) instruction doesn't work correctly and not to use it. This is also useful when using JTAG debugger. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c1caefc82e62..77848d9fca68 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int __init x86_xsave_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + return 1; +} +__setup("noxsave", x86_xsave_setup); + #ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; From 619ac3b75a1e9b2df66857f6a0fb466f1da5fa9e Mon Sep 17 00:00:00 2001 From: Ma Ling Date: Mon, 18 May 2009 16:12:46 +0800 Subject: [PATCH 34/91] drm/i915: Use an I2C algo to do the flip to SDVO DDC bus. Previously, we would set the control bus switch before calls were made to request EDID information over DDC. But recently the DDC code started doing multiple I2C transfers to get the EDID extensions as well. This tripped up SDVO, because the control bus switch is only in effect until the next STOP after a START. By doing our own algo, we can wrap each i2c transaction on the DDC I2C bus with the control bus switch it requires. freedesktop.org bug #21042 Signed-off-by: Ma Ling [anholt: Hand application for conflict, fixed error path] Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_sdvo.c | 67 +++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index ded122c1ae2d..f3ef6bfd8ffc 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1402,10 +1402,8 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect static void intel_sdvo_get_ddc_modes(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); - struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; /* set the bus switch and get the modes */ - intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); intel_ddc_get_modes(intel_output); #if 0 @@ -1601,6 +1599,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector) if (intel_output->i2c_bus) intel_i2c_destroy(intel_output->i2c_bus); + if (intel_output->ddc_bus) + intel_i2c_destroy(intel_output->ddc_bus); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(intel_output); @@ -1697,12 +1698,56 @@ intel_sdvo_get_digital_encoding_mode(struct intel_output *output) return true; } +static struct intel_output * +intel_sdvo_chan_to_intel_output(struct intel_i2c_chan *chan) +{ + struct drm_device *dev = chan->drm_dev; + struct drm_connector *connector; + struct intel_output *intel_output = NULL; + + list_for_each_entry(connector, + &dev->mode_config.connector_list, head) { + if (to_intel_output(connector)->ddc_bus == chan) { + intel_output = to_intel_output(connector); + break; + } + } + return intel_output; +} + +static int intel_sdvo_master_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg msgs[], int num) +{ + struct intel_output *intel_output; + struct intel_sdvo_priv *sdvo_priv; + struct i2c_algo_bit_data *algo_data; + struct i2c_algorithm *algo; + + algo_data = (struct i2c_algo_bit_data *)i2c_adap->algo_data; + intel_output = + intel_sdvo_chan_to_intel_output( + (struct intel_i2c_chan *)(algo_data->data)); + if (intel_output == NULL) + return -EINVAL; + + sdvo_priv = intel_output->dev_priv; + algo = (struct i2c_algorithm *)intel_output->i2c_bus->adapter.algo; + + intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); + return algo->master_xfer(i2c_adap, msgs, num); +} + +static struct i2c_algorithm intel_sdvo_i2c_bit_algo = { + .master_xfer = intel_sdvo_master_xfer, +}; + bool intel_sdvo_init(struct drm_device *dev, int output_device) { struct drm_connector *connector; struct intel_output *intel_output; struct intel_sdvo_priv *sdvo_priv; struct intel_i2c_chan *i2cbus = NULL; + struct intel_i2c_chan *ddcbus = NULL; int connector_type; u8 ch[0x40]; int i; @@ -1748,6 +1793,20 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) } } + /* setup the DDC bus. */ + if (output_device == SDVOB) + ddcbus = intel_i2c_create(dev, GPIOE, "SDVOB DDC BUS"); + else + ddcbus = intel_i2c_create(dev, GPIOE, "SDVOC DDC BUS"); + + if (ddcbus == NULL) + goto err_i2c; + + intel_sdvo_i2c_bit_algo.functionality = + intel_output->i2c_bus->adapter.algo->functionality; + ddcbus->adapter.algo = &intel_sdvo_i2c_bit_algo; + intel_output->ddc_bus = ddcbus; + /* In defaut case sdvo lvds is false */ sdvo_priv->is_lvds = false; intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); @@ -1862,11 +1921,11 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) sdvo_priv->caps.output_flags & (SDVO_OUTPUT_TMDS1 | SDVO_OUTPUT_RGB1) ? 'Y' : 'N'); - intel_output->ddc_bus = i2cbus; - return true; err_i2c: + if (ddcbus != NULL) + intel_i2c_destroy(intel_output->ddc_bus); intel_i2c_destroy(intel_output->i2c_bus); err_inteloutput: kfree(intel_output); From 0b827537e339c084ac9384df588969d400be9e0d Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 22 May 2009 13:23:37 -0700 Subject: [PATCH 35/91] x86: bugfix wbinvd() model check instead of family check wbinvd is supported on all CPUs 486 or later. But, pageattr.c is checking x86_model >= 4 before wbinvd(), which looks like an oversight bug. It was first introduced at one place by changeset d7c8f21a8cad0228c7c5ce2bb6dbd95d1ee49d13 and got copied over to second place in the same file later. [ Impact: fix missing cache flush on early-model CPUs, potential data corruption ] Signed-off-by: Venkatesh Pallipadi Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 797f9f107cb6..2cc019a3f71b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg) */ __flush_tlb_all(); - if (cache && boot_cpu_data.x86_model >= 4) + if (cache && boot_cpu_data.x86 >= 4) wbinvd(); } @@ -218,7 +218,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, /* 4M threshold */ if (numpages >= 1024) { - if (boot_cpu_data.x86_model >= 4) + if (boot_cpu_data.x86 >= 4) wbinvd(); return; } From 0af48f42df15b97080b450d24219dd95db7b929a Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 22 May 2009 13:23:38 -0700 Subject: [PATCH 36/91] x86: cpa_flush_array wbinvd should be done on all CPUs cpa_flush_array seems to prefer wbinvd() over clflush at 4M threshold. clflush needs to be done on only one CPU as per instruction definition. wbinvd() however, should be done on all CPUs. [ Impact: fix missing flush which could cause data corruption ] Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2cc019a3f71b..0f9052bcec4b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -204,6 +204,11 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) } } +static void wbinvd_local(void *unused) +{ + wbinvd(); +} + static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { @@ -219,7 +224,8 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, /* 4M threshold */ if (numpages >= 1024) { if (boot_cpu_data.x86 >= 4) - wbinvd(); + on_each_cpu(wbinvd_local, NULL, 1); + return; } /* From 95caa0a9bdaf93607bd0cc8932f53112496f2f22 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 22 May 2009 21:30:39 -0300 Subject: [PATCH 37/91] icom: fix rmmod crash Actually the icom driver is crashing when is being removed because the driver is kfreeing the adapter structure before calling pci_release_regions(), which result in the following error: Unable to handle kernel paging request for data at address 0x6b6b6b6b6b6b6d33 Faulting instruction address: 0xc000000000246b80 Oops: Kernel access of bad area, sig: 11 [#1] .... [c000000012d436a0] [c0000000001002d0] .kfree+0x120/0x34c (unreliable) [c000000012d43730] [c000000000246d60] .pci_release_selected_regions+0x3c/0x68 [c000000012d437c0] [d000000002d54700] .icom_kref_release+0xf4/0x118 [icom] [c000000012d43850] [c000000000232e50] .kref_put+0x74/0x94 [c000000012d438d0] [d000000002d56c58] .icom_remove+0x40/0xa4 [icom] [c000000012d43960] [c000000000249e48] .pci_device_remove+0x50/0x90 [c000000012d439e0] [c0000000002d68d8] .__device_release_driver+0x94/0xd4 [c000000012d43a70] [c0000000002d7104] .driver_detach+0xf8/0x12c [c000000012d43b00] [c0000000002d549c] .bus_remove_driver+0xbc/0x11c [c000000012d43b90] [c0000000002d71dc] .driver_unregister+0x60/0x80 [c000000012d43c20] [c00000000024a07c] .pci_unregister_driver+0x44/0xe8 [c000000012d43cb0] [d000000002d56bf4] .icom_exit+0x1c/0x40 [icom] [c000000012d43d30] [c000000000095fa8] .SyS_delete_module+0x214/0x2a8 [c000000012d43e30] [c00000000000852c] syscall_exit+0x0/0x40 Signed-off-by: Breno Leitao Cc: stable@kernel.org Cc: Alan Cox Signed-off-by: Linus Torvalds --- drivers/serial/icom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c index 6579e2be1dd1..a461b3b2c72d 100644 --- a/drivers/serial/icom.c +++ b/drivers/serial/icom.c @@ -1472,8 +1472,8 @@ static void icom_remove_adapter(struct icom_adapter *icom_adapter) free_irq(icom_adapter->pci_dev->irq, (void *) icom_adapter); iounmap(icom_adapter->base_addr); - icom_free_adapter(icom_adapter); pci_release_regions(icom_adapter->pci_dev); + icom_free_adapter(icom_adapter); } static void icom_kref_release(struct kref *kref) From 8db14ca12569fe885694bd3d5ff84c2d973d3cb0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 23 May 2009 18:57:25 +0000 Subject: [PATCH 38/91] [CIFS] Avoid open on possible directories since Samba now rejects them Small change (mostly formatting) to limit lookup based open calls to file create only. After discussion yesteday on samba-technical about the posix lookup regression, and looking at a problem with cifs posix open to one particular Samba version, Jeff and JRA realized that Samba server's behavior changed in this area (posix open behavior on files vs. directories). To make this behavior consistent, JRA just made a fix to Samba server to alter how it handles open of directories (now returning the equivalent of EISDIR instead of success). Since we don't know at lookup time whether the inode is a directory or file (and thus whether posix open will succeed with most current Samba server), this change avoids the posix open code on lookup open (just issues posix open on creates). This gets the semantic benefits we want (atomicity, posix byte range locks, improved write semantics on newly created files) and file create still is fast, and we avoid the problem that Jeff noticed yesterday with "openat" (and some open directory calls) of non-cached directories to one version of Samba server, and will work with future Samba versions (which include the fix jra just pushed into Samba server). I confirmed this approach with jra yesterday and with Shirish today. Posix open is only called (at lookup time) for file create now. For opens (rather than creates), because we do not know if it is a file or directory yet, and current Samba no longer allows us to do posix open on dirs, we could end up wasting an open call on what turns out to be a dir. For file opens, we wait to call posix open till cifs_open. It could be added here (lookup) in the future but the performance tradeoff of the extra network request when EISDIR or EACCES is returned would have to be weighed against the 50% reduction in network traffic in the other paths. Reviewed-by: Shirish Pargaonkar Tested-by: Jeff Layton CC: Jeremy Allison Signed-off-by: Steve French --- fs/cifs/dir.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f49d684edd96..3758965d73d5 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -657,31 +657,36 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); + /* Posix open is only called (at lookup time) for file create now. + * For opens (rather than creates), because we do not know if it + * is a file or directory yet, and current Samba no longer allows + * us to do posix open on dirs, we could end up wasting an open call + * on what turns out to be a dir. For file opens, we wait to call posix + * open till cifs_open. It could be added here (lookup) in the future + * but the performance tradeoff of the extra network request when EISDIR + * or EACCES is returned would have to be weighed against the 50% + * reduction in network traffic in the other paths. + */ if (pTcon->unix_ext) { if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && - (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open) { - if (!((nd->intent.open.flags & O_CREAT) && - (nd->intent.open.flags & O_EXCL))) { - rc = cifs_posix_open(full_path, &newInode, + (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && + (nd->intent.open.flags & O_CREAT)) { + rc = cifs_posix_open(full_path, &newInode, parent_dir_inode->i_sb, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); - /* - * This code works around a bug in - * samba posix open in samba versions 3.3.1 - * and earlier where create works - * but open fails with invalid parameter. - * If either of these error codes are - * returned, follow the normal lookup. - * Otherwise, the error during posix open - * is handled. - */ - if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) - posix_open = true; - else - pTcon->broken_posix_open = true; - } + /* + * The check below works around a bug in POSIX + * open in samba versions 3.3.1 and earlier where + * open could incorrectly fail with invalid parameter. + * If either that or op not supported returned, follow + * the normal lookup. + */ + if ((rc == 0) || (rc == -ENOENT)) + posix_open = true; + else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) + pTcon->broken_posix_open = true; } if (!posix_open) rc = cifs_get_inode_info_unix(&newInode, full_path, From 59a3759d0fe8d969888c741bb33f4946e4d3750d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 23 May 2009 14:47:00 -0700 Subject: [PATCH 39/91] Linux 2.6.30-rc7 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b57e1f539e83..739fd34a72a2 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 30 -EXTRAVERSION = -rc6 -NAME = Vindictive Armadillo +EXTRAVERSION = -rc7 +NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* # To see a list of typical targets execute "make help" From 93574844bc3906941b89d6b6f72e01e87413f3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ozan=20=C3=87a=C4=9Flayan?= Date: Sat, 23 May 2009 15:00:04 +0300 Subject: [PATCH 40/91] ALSA: hda - Add forced codec-slots for ASUS W5Fm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASUS W5Fm needs the fixed codec-slots to probe to override the BIOS problem like W5F. Tested-by: Alp Kılıç Signed-off-by: Ozan Çağlayan Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 21e99cfa8c49..3128e1a6bc65 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2141,6 +2141,7 @@ static struct snd_pci_quirk probe_mask_list[] __devinitdata = { /* including bogus ALC268 in slot#2 that conflicts with ALC888 */ SND_PCI_QUIRK(0x17c0, 0x4085, "Medion MD96630", 0x01), /* forced codec slots */ + SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103), SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103), {} }; From 679d92ed1403b6cf9a19aa42ec62b81cae1aa017 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 24 May 2009 19:00:08 +0200 Subject: [PATCH 41/91] ALSA: hda - Add 5stack-no-fp model for STAC927x The recent fix for the headphone volume control on IDT/STAC codecs resulted in the removal of invalid "Side" volume eventually. But, if the front panel doesn't exist, this setup could be regarded as a sort of regression, as reported in kernel bug #13250. Now as a workaround, a new model 5stack-no-fp is added so that the user without the front panel can choose this one explicitly. Reference: bko#13250 http://bugzilla.kernel.org/show_bug.cgi?id=13250 Signed-off-by: Takashi Iwai --- Documentation/sound/alsa/HD-Audio-Models.txt | 1 + sound/pci/hda/patch_sigmatel.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index 8eec05bc079e..322869fc8a9e 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt @@ -334,6 +334,7 @@ STAC9227/9228/9229/927x ref-no-jd Reference board without HP/Mic jack detection 3stack D965 3stack 5stack D965 5stack + SPDIF + 5stack-no-fp D965 5stack without front panel dell-3stack Dell Dimension E520 dell-bios Fixes with Dell BIOS setup auto BIOS setup (default) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 03b3646018a1..d2fd8ef6aef8 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -150,6 +150,7 @@ enum { STAC_D965_REF, STAC_D965_3ST, STAC_D965_5ST, + STAC_D965_5ST_NO_FP, STAC_DELL_3ST, STAC_DELL_BIOS, STAC_927X_MODELS @@ -2154,6 +2155,13 @@ static unsigned int d965_5st_pin_configs[14] = { 0x40000100, 0x40000100 }; +static unsigned int d965_5st_no_fp_pin_configs[14] = { + 0x40000100, 0x40000100, 0x0181304e, 0x01014010, + 0x01a19040, 0x01011012, 0x01016011, 0x40000100, + 0x40000100, 0x40000100, 0x40000100, 0x01442070, + 0x40000100, 0x40000100 +}; + static unsigned int dell_3st_pin_configs[14] = { 0x02211230, 0x02a11220, 0x01a19040, 0x01114210, 0x01111212, 0x01116211, 0x01813050, 0x01112214, @@ -2166,6 +2174,7 @@ static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = { [STAC_D965_REF] = ref927x_pin_configs, [STAC_D965_3ST] = d965_3st_pin_configs, [STAC_D965_5ST] = d965_5st_pin_configs, + [STAC_D965_5ST_NO_FP] = d965_5st_no_fp_pin_configs, [STAC_DELL_3ST] = dell_3st_pin_configs, [STAC_DELL_BIOS] = NULL, }; @@ -2176,6 +2185,7 @@ static const char *stac927x_models[STAC_927X_MODELS] = { [STAC_D965_REF] = "ref", [STAC_D965_3ST] = "3stack", [STAC_D965_5ST] = "5stack", + [STAC_D965_5ST_NO_FP] = "5stack-no-fp", [STAC_DELL_3ST] = "dell-3stack", [STAC_DELL_BIOS] = "dell-bios", }; From 657cafa6b0f5296424d6f43f6f6eeb4a3222117e Mon Sep 17 00:00:00 2001 From: Alex Riesen Date: Sun, 24 May 2009 15:30:48 +0200 Subject: [PATCH 42/91] Use a format for linux_banner There is no format specifiers left in the linux_banner, and gcc-4.3 complains seeing the printk. Signed-off-by: Alex Riesen Signed-off-by: Linus Torvalds --- init/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 3bbf93be744c..d721dad05dd7 100644 --- a/init/main.c +++ b/init/main.c @@ -566,8 +566,7 @@ asmlinkage void __init start_kernel(void) tick_init(); boot_cpu_init(); page_address_init(); - printk(KERN_NOTICE); - printk(linux_banner); + printk(KERN_NOTICE "%s", linux_banner); setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); From 32bdfac5462d777f35b00838893c4f87baf23efe Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 24 May 2009 21:15:07 +0200 Subject: [PATCH 43/91] PM: Do not hold dpm_list_mtx while disabling/enabling nonboot CPUs We shouldn't hold dpm_list_mtx while executing [disable|enable]_nonboot_cpus(), because theoretically this may lead to a deadlock as shown by the following example (provided by Johannes Berg): CPU 3 CPU 2 CPU 1 suspend/hibernate something: rtnl_lock() device_pm_lock() -> mutex_lock(&dpm_list_mtx) mutex_lock(&dpm_list_mtx) linkwatch_work -> rtnl_lock() disable_nonboot_cpus() -> flush CPU 3 workqueue Fortunately, device drivers are supposed to stop any activities that might lead to the registration of new device objects way before disable_nonboot_cpus() is called, so it shouldn't be necessary to hold dpm_list_mtx over the entire late part of device suspend and early part of device resume. Thus, during the late suspend and the early resume of devices acquire dpm_list_mtx only when dpm_list is going to be traversed and release it right after that. This patch is reported to fix the regressions tracked as http://bugzilla.kernel.org/show_bug.cgi?id=13245. Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern Reported-by: Miles Lane Tested-by: Ming Lei --- drivers/base/power/main.c | 4 ++++ kernel/kexec.c | 2 -- kernel/power/disk.c | 21 +++------------------ kernel/power/main.c | 7 +------ 4 files changed, 8 insertions(+), 26 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 69b4ddb7de3b..3e4bc699bc0f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -357,6 +357,7 @@ static void dpm_power_up(pm_message_t state) { struct device *dev; + mutex_lock(&dpm_list_mtx); list_for_each_entry(dev, &dpm_list, power.entry) if (dev->power.status > DPM_OFF) { int error; @@ -366,6 +367,7 @@ static void dpm_power_up(pm_message_t state) if (error) pm_dev_err(dev, state, " early", error); } + mutex_unlock(&dpm_list_mtx); } /** @@ -614,6 +616,7 @@ int device_power_down(pm_message_t state) int error = 0; suspend_device_irqs(); + mutex_lock(&dpm_list_mtx); list_for_each_entry_reverse(dev, &dpm_list, power.entry) { error = suspend_device_noirq(dev, state); if (error) { @@ -622,6 +625,7 @@ int device_power_down(pm_message_t state) } dev->power.status = DPM_OFF_IRQ; } + mutex_unlock(&dpm_list_mtx); if (error) device_power_up(resume_event(state)); return error; diff --git a/kernel/kexec.c b/kernel/kexec.c index 5a758c6e4950..e4983770913b 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1451,7 +1451,6 @@ int kernel_kexec(void) error = device_suspend(PMSG_FREEZE); if (error) goto Resume_console; - device_pm_lock(); /* At this point, device_suspend() has been called, * but *not* device_power_down(). We *must* * device_power_down() now. Otherwise, drivers for @@ -1489,7 +1488,6 @@ int kernel_kexec(void) enable_nonboot_cpus(); device_power_up(PMSG_RESTORE); Resume_devices: - device_pm_unlock(); device_resume(PMSG_RESTORE); Resume_console: resume_console(); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index b0dc9e7a0d17..5cb080e7eebd 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -215,8 +215,6 @@ static int create_image(int platform_mode) if (error) return error; - device_pm_lock(); - /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) @@ -227,7 +225,7 @@ static int create_image(int platform_mode) if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); - goto Unlock; + return error; } error = platform_pre_snapshot(platform_mode); @@ -280,9 +278,6 @@ static int create_image(int platform_mode) device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - Unlock: - device_pm_unlock(); - return error; } @@ -344,13 +339,11 @@ static int resume_target_kernel(bool platform_mode) { int error; - device_pm_lock(); - error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); - goto Unlock; + return error; } error = platform_pre_restore(platform_mode); @@ -403,9 +396,6 @@ static int resume_target_kernel(bool platform_mode) device_power_up(PMSG_RECOVER); - Unlock: - device_pm_unlock(); - return error; } @@ -464,11 +454,9 @@ int hibernation_platform_enter(void) goto Resume_devices; } - device_pm_lock(); - error = device_power_down(PMSG_HIBERNATE); if (error) - goto Unlock; + goto Resume_devices; error = hibernation_ops->prepare(); if (error) @@ -493,9 +481,6 @@ int hibernation_platform_enter(void) device_power_up(PMSG_RESTORE); - Unlock: - device_pm_unlock(); - Resume_devices: entering_platform_hibernation = false; device_resume(PMSG_RESTORE); diff --git a/kernel/power/main.c b/kernel/power/main.c index f99ed6a75eac..868028280d13 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -289,12 +289,10 @@ static int suspend_enter(suspend_state_t state) { int error; - device_pm_lock(); - if (suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) - goto Done; + return error; } error = device_power_down(PMSG_SUSPEND); @@ -343,9 +341,6 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->finish) suspend_ops->finish(); - Done: - device_pm_unlock(); - return error; } From d5a877e8dd409d8c702986d06485c374b705d340 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 24 May 2009 13:03:43 -0700 Subject: [PATCH 44/91] async: make sure independent async domains can't accidentally entangle The problem occurs when async_synchronize_full_domain() is called when the async_pending list is not empty. This will cause lowest_running() to return the cookie of the first entry on the async_pending list, which might be nothing at all to do with the domain being asked for and thus cause the domain synchronization to wait for an unrelated domain. This can cause a deadlock if domain synchronization is used from one domain to wait for another. Fix by running over the async_pending list to see if any pending items actually belong to our domain (and return their cookies if they do). Signed-off-by: James Bottomley Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- kernel/async.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index 968ef9457d4e..50540301ed0f 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -92,19 +92,23 @@ extern int initcall_debug; static async_cookie_t __lowest_in_progress(struct list_head *running) { struct async_entry *entry; + async_cookie_t ret = next_cookie; /* begin with "infinity" value */ + if (!list_empty(running)) { entry = list_first_entry(running, struct async_entry, list); - return entry->cookie; - } else if (!list_empty(&async_pending)) { - entry = list_first_entry(&async_pending, - struct async_entry, list); - return entry->cookie; - } else { - /* nothing in progress... next_cookie is "infinity" */ - return next_cookie; + ret = entry->cookie; } + if (!list_empty(&async_pending)) { + list_for_each_entry(entry, &async_pending, list) + if (entry->running == running) { + ret = entry->cookie; + break; + } + } + + return ret; } static async_cookie_t lowest_in_progress(struct list_head *running) From 71c9d8b68b299bef614afc7907393564a9f1476f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 May 2009 12:01:59 +0900 Subject: [PATCH 45/91] x86: Remove remap percpu allocator for the time being Remap percpu allocator has subtle bug when combined with page attribute changing. Remap percpu allocator aliases PMD pages for the first chunk and as pageattr doesn't know about the alias it ends up updating page attributes of the original mapping thus leaving the alises in inconsistent state which might lead to subtle data corruption. Please read the following threads for more information: http://thread.gmane.org/gmane.linux.kernel/835783 The following is the proposed fix which teaches pageattr about percpu aliases. http://thread.gmane.org/gmane.linux.kernel/837157 However, the above changes are deemed too pervasive for upstream inclusion for 2.6.30 release, so this patch essentially disables the remap allocator for the time being. Signed-off-by: Tejun Heo LKML-Reference: <4A1A0A27.4050301@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 3a97a4cf1872..8f0e13be36b3 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -160,8 +160,10 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) /* * If large page isn't supported, there's no benefit in doing * this. Also, on non-NUMA, embedding is better. + * + * NOTE: disabled for now. */ - if (!cpu_has_pse || !pcpu_need_numa()) + if (true || !cpu_has_pse || !pcpu_need_numa()) return -EINVAL; /* From 461c6c3a0a23a8fac1a4b636e93ff5dfe599a241 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 May 2009 08:06:02 +0200 Subject: [PATCH 46/91] ALSA: hda - Add missing check of pin vref 50 and others in Realtek codecs Some Realtek codecs like ALC861 seem to support only VREF50 while the current driver assumes it's only VREF80. Check other VREF bits to set the correct value. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bcbb736f94f0..0fd258eba3a5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -776,6 +776,12 @@ static void alc_set_input_pin(struct hda_codec *codec, hda_nid_t nid, pincap = (pincap & AC_PINCAP_VREF) >> AC_PINCAP_VREF_SHIFT; if (pincap & AC_PINCAP_VREF_80) val = PIN_VREF80; + else if (pincap & AC_PINCAP_VREF_50) + val = PIN_VREF50; + else if (pincap & AC_PINCAP_VREF_100) + val = PIN_VREF100; + else if (pincap & AC_PINCAP_VREF_GRD) + val = PIN_VREFGRD; } snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, val); } From 4e2fd555199977c5994d1a4d2d3b8761b20ca4c7 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 25 May 2009 00:42:34 -0700 Subject: [PATCH 47/91] gianfar: fix BUG under load after introduction of skb recycling Since commit 0fd56bb5be6455d0d42241e65aed057244665e5e ("gianfar: Add support for skb recycling"), gianfar puts skbuffs that are in the rx ring back onto the recycle list as-is in case there was a receive error, but this breaks the following invariant: that all skbuffs on the recycle list have skb->data = skb->head + NET_SKB_PAD. The RXBUF_ALIGNMENT realignment done in gfar_new_skb() will be done twice on skbuffs recycled in this way, causing there not to be enough room in the skb anymore to receive a full packet, eventually leading to an skb_over_panic from gfar_clean_rx_ring() -> skb_put(). Resetting the skb->data pointer to skb->head + NET_SKB_PAD before putting the skb back onto the recycle list restores the mentioned invariant, and should fix this issue. Reported-by: Michael Guntsche Tested-by: Michael Guntsche Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- drivers/net/gianfar.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index b2c49679bba7..a0519184e54e 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -1885,8 +1885,17 @@ int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit) if (unlikely(!newskb)) newskb = skb; - else if (skb) + else if (skb) { + /* + * We need to reset ->data to what it + * was before gfar_new_skb() re-aligned + * it to an RXBUF_ALIGNMENT boundary + * before we put the skb back on the + * recycle list. + */ + skb->data = skb->head + NET_SKB_PAD; __skb_queue_head(&priv->rx_recycle, skb); + } } else { /* Increment the number of packets */ dev->stats.rx_packets++; From a8cd0244e9cebcf9b358d24c7e7410062f3665cb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 May 2009 22:15:25 +0300 Subject: [PATCH 48/91] KVM: Make paravirt tlb flush also reload the PAE PDPTRs The paravirt tlb flush may be used not only to flush TLBs, but also to reload the four page-directory-pointer-table entries, as it is used as a replacement for reloading CR3. Change the code to do the entire CR3 reloading dance instead of simply flushing the TLB. Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b6caf1329b1b..32cf11e5728a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2897,8 +2897,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - kvm_x86_ops->tlb_flush(vcpu); - set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); + kvm_set_cr3(vcpu, vcpu->arch.cr3); return 1; } From a2edf57f510cce6a389cc14e58c6ad0a4296d6f9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 24 May 2009 22:19:00 +0300 Subject: [PATCH 49/91] KVM: Fix PDPTR reloading on CR4 writes The processor is documented to reload the PDPTRs while in PAE mode if any of the CR4 bits PSE, PGE, or PAE change. Linux relies on this behaviour when zapping the low mappings of PAE kernels during boot. The code already handled changes to CR4.PAE; augment it to also notice changes to PSE and PGE. This triggered while booting an F11 PAE kernel; the futex initialization code runs before any CR3 reloads and writes to a NULL pointer; the futex subsystem ended up uninitialized, killing PI futexes and pulseaudio which uses them. Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 49079a46687b..3944e917e794 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -338,6 +338,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw); void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { + unsigned long old_cr4 = vcpu->arch.cr4; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; + if (cr4 & CR4_RESERVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); @@ -351,7 +354,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_inject_gp(vcpu, 0); return; } - } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) + } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) + && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); kvm_inject_gp(vcpu, 0); From 62e1e389f87a8839ad83b08c44691d1df8320846 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 May 2009 09:40:59 +1000 Subject: [PATCH 50/91] md: always update level / chunk_size / layout when writing v1.x metadata. We previously didn't update these fields when writing the metadata because they could never change. They can now, so we better write them. v0.90 metadata always updated these fields. Signed-off-by: NeilBrown --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index fccc8343a250..aa79d55875f0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1375,6 +1375,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->dev_sectors); + sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); + sb->level = cpu_to_le32(mddev->level); + sb->layout = cpu_to_le32(mddev->layout); if (mddev->bitmap && mddev->bitmap_file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); From 2b69c83924396ad1eda36fdd267c9d2f360f5555 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 May 2009 09:41:17 +1000 Subject: [PATCH 51/91] md: improve errno return when setting array_size Instead of always returns EINVAL if anything goes wrong when setting the array size, add the option of E2BIG if the size requested is too large. This makes it easier for user-space to be sure what went wrong. Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index aa79d55875f0..58e0b02a74c2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3683,7 +3683,7 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) if (strict_blocks_to_sectors(buf, §ors) < 0) return -EINVAL; if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) - return -EINVAL; + return -E2BIG; mddev->external_size = 1; } From be512691036cc989c11d0f418187efbbf14468e6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 May 2009 09:41:17 +1000 Subject: [PATCH 52/91] md: bitmap: improve bitmap maintenance code. The code for checking which bits in the bitmap can be cleared has 2 problems: 1/ it repeatedly takes and drops a spinlock, where it would make more sense to just hold on to it most of the time. 2/ it doesn't make use of some opportunities to skip large sections of the bitmap This patch fixes those. It will only affect CPU consumption, not correctness. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 47c68bc75a17..56df1cee8fb3 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1097,14 +1097,12 @@ void bitmap_daemon_work(struct bitmap *bitmap) } bitmap->allclean = 1; + spin_lock_irqsave(&bitmap->lock, flags); for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; - spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->filemap) { + if (!bitmap->filemap) /* error or shutdown */ - spin_unlock_irqrestore(&bitmap->lock, flags); break; - } page = filemap_get_page(bitmap, j); @@ -1121,6 +1119,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) write_page(bitmap, page, 0); bitmap->allclean = 0; } + spin_lock_irqsave(&bitmap->lock, flags); + j |= (PAGE_BITS - 1); continue; } @@ -1181,9 +1181,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) ext2_clear_bit(file_page_offset(j), paddr); kunmap_atomic(paddr, KM_USER0); } - } - spin_unlock_irqrestore(&bitmap->lock, flags); + } else + j |= PAGE_COUNTER_MASK; } + spin_unlock_irqrestore(&bitmap->lock, flags); /* now sync the final page */ if (lastpage != NULL) { From b6a9ce688f613e2ee5f15e6720e0bb8520efc36e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 May 2009 09:41:17 +1000 Subject: [PATCH 53/91] md: export 'frozen' resync state through sysfs The md resync engine has a 'frozen' state which ensures that no resync/recovery. This is used to avoid races. Export this state through the 'sync_action' sysfs attribute so that user-space can benefit and also avoid some races. Signed-off-by: NeilBrown --- drivers/md/md.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 58e0b02a74c2..384e4f0904c8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3306,7 +3306,9 @@ static ssize_t action_show(mddev_t *mddev, char *page) { char *type = "idle"; - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) + type = "frozen"; + else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) type = "reshape"; @@ -3329,7 +3331,12 @@ action_store(mddev_t *mddev, const char *page, size_t len) if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (cmd_match(page, "idle")) { + if (cmd_match(page, "frozen")) + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + else + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_unregister_thread(mddev->sync_thread); From 848b3182365fdf5a05bcd5ed949071cac2c894b3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 May 2009 12:41:08 +1000 Subject: [PATCH 54/91] md: raid5: avoid sector values going negative when testing reshape progress. As sector_t in unsigned, we cannot afford to let 'safepos' etc go negative. So replace a -= b; by a -= min(b,a); Signed-off-by: NeilBrown --- drivers/md/raid5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4616bc3a6e71..3c3626d2a1f9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped safepos = conf->reshape_safe; sector_div(safepos, data_disks); if (mddev->delta_disks < 0) { - writepos -= reshape_sectors; + writepos -= min(reshape_sectors, writepos); readpos += reshape_sectors; safepos += reshape_sectors; } else { writepos += reshape_sectors; - readpos -= reshape_sectors; - safepos -= reshape_sectors; + readpos -= min(reshape_sectors, readpos); + safepos -= min(reshape_sectors, safepos); } /* 'writepos' is the most advanced device address we might write. From 7a91ee1f628ef6bfe3f13067c0ddf9db520cb86b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 May 2009 12:57:21 +1000 Subject: [PATCH 55/91] md: don't update curr_resync_completed without also updating reshape_position. In order for the metadata to always be consistent, we mustn't updated curr_resync_completed without also updating reshape_position. The reshape code updates both at the same time. However since commit 97e4f42d62badb0f9fbc27c013e89bc1336a03bc the common md_do_sync will sometimes update curr_resync_completed but is not in a position to update reshape_position. So if MD_RECOVERY_RESHAPE is set (indicating that a reshape is happening, so reshape_position might change), don't update curr_resync_completed in md_do_sync, leave it to the per-personality reshape code. Signed-off-by: NeilBrown --- drivers/md/md.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 384e4f0904c8..954456532acc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6362,12 +6362,13 @@ void md_do_sync(mddev_t *mddev) skipped = 0; - if ((mddev->curr_resync > mddev->curr_resync_completed && - (mddev->curr_resync - mddev->curr_resync_completed) - > (max_sectors >> 4)) || - (j - mddev->curr_resync_completed)*2 - >= mddev->resync_max - mddev->curr_resync_completed - ) { + if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + ((mddev->curr_resync > mddev->curr_resync_completed && + (mddev->curr_resync - mddev->curr_resync_completed) + > (max_sectors >> 4)) || + (j - mddev->curr_resync_completed)*2 + >= mddev->resync_max - mddev->curr_resync_completed + )) { /* time to update curr_resync_completed */ blk_unplug(mddev->queue); wait_event(mddev->recovery_wait, From b492b852cd8c99505708152c29a5e09a787af9de Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 May 2009 12:57:36 +1000 Subject: [PATCH 56/91] md: don't use locked_ioctl. md has no need for the BKL - it does its own locking. So md_ioctl doesn't need to be a locked_ioctl. Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 954456532acc..641b211fe3fe 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5567,7 +5567,7 @@ static struct block_device_operations md_fops = .owner = THIS_MODULE, .open = md_open, .release = md_release, - .locked_ioctl = md_ioctl, + .ioctl = md_ioctl, .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, From 8e35961b57da14cb64cb0e4e1b7e3aabda6396fe Mon Sep 17 00:00:00 2001 From: Hideo Saito Date: Sun, 24 May 2009 15:33:34 +0000 Subject: [PATCH 57/91] powerpc/mm: Fix broken MMU PID stealing on !SMP The recent rework of the MMU PID handling for non-hash CPUs has a subtle bug in the !SMP "optimized" variant of the PID stealing function. It clears the PID in the mm context before it calls local_flush_tlb_mm(). However, the later will not flush anything if the PID in the context is clear... Signed-off-by: Hideo Saito Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mmu_context_nohash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index a70e311bd457..030d0005b4d2 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -127,12 +127,12 @@ static unsigned int steal_context_up(unsigned int id) pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); - /* Mark this mm has having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ __clear_bit(id, stale_map[cpu]); From 217cbfa856dc1cbc2890781626c4032d9e3ec59f Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 25 May 2009 22:43:49 -0700 Subject: [PATCH 58/91] mac8390: fix regression caused during net_device_ops conversion Changeset ca17584bf2ad1b1e37a5c0e4386728cc5fc9dabc ("mac8390: update to net_device_ops") broke mac8390 by adding 8390.o to the link. That meant that lib8390.c was included twice, once in mac8390.c and once in 8390.c, subject to different macros. This patch reverts that by avoiding the wrappers in 8390.c. They seem to be of no value since COMPAT_NET_DEV_OPS is going away soon. Tested with a Kinetics EtherPort card. Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/Makefile | 2 +- drivers/net/mac8390.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 1fc4602a6ff2..a1c25cb4669f 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -102,7 +102,7 @@ obj-$(CONFIG_HAMACHI) += hamachi.o obj-$(CONFIG_NET) += Space.o loopback.o obj-$(CONFIG_SEEQ8005) += seeq8005.o obj-$(CONFIG_NET_SB1000) += sb1000.o -obj-$(CONFIG_MAC8390) += mac8390.o 8390.o +obj-$(CONFIG_MAC8390) += mac8390.o obj-$(CONFIG_APNE) += apne.o 8390.o obj-$(CONFIG_PCMCIA_PCNET) += 8390.o obj-$(CONFIG_HP100) += hp100.o diff --git a/drivers/net/mac8390.c b/drivers/net/mac8390.c index 8e884869a05b..f26667d5eaae 100644 --- a/drivers/net/mac8390.c +++ b/drivers/net/mac8390.c @@ -304,7 +304,7 @@ struct net_device * __init mac8390_probe(int unit) if (!MACH_IS_MAC) return ERR_PTR(-ENODEV); - dev = alloc_ei_netdev(); + dev = ____alloc_ei_netdev(0); if (!dev) return ERR_PTR(-ENOMEM); @@ -481,10 +481,10 @@ void cleanup_module(void) static const struct net_device_ops mac8390_netdev_ops = { .ndo_open = mac8390_open, .ndo_stop = mac8390_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, From c80a5cdfc5ca6533cb893154f546370da1fdb8f0 Mon Sep 17 00:00:00 2001 From: Doug Leith Date: Mon, 25 May 2009 22:44:59 -0700 Subject: [PATCH 59/91] tcp: tcp_vegas ssthresh bugfix This patch fixes ssthresh accounting issues in tcp_vegas when cwnd decreases Signed-off-by: Doug Leith Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a453aac91bd3..c6743eec9b7d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); +static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) +{ + return min(tp->snd_ssthresh, tp->snd_cwnd-1); +} + static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); @@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; - if (diff > gamma && tp->snd_ssthresh > 2 ) { + if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ - tp->snd_ssthresh = 2; /* Set cwnd to match the actual rate * exactly: @@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * utilization. */ tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ @@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * we slow down. */ tp->snd_cwnd--; + tp->snd_ssthresh + = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. From 46176b4f6bac19454b7b5c35f68594b85850a600 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 26 May 2009 14:42:40 +0900 Subject: [PATCH 60/91] x86, relocs: ignore R_386_NONE in kernel relocation entries For relocatable 32bit kernels, boot/compressed/relocs.c processes relocation entries in the kernel image and appends it to the kernel image such that boot/compressed/head_32.S can relocate the kernel. The kernel image is one statically linked object and only uses two relocation types - R_386_PC32 and R_386_32, of the two only the latter needs massaging during kernel relocation and thus handled by relocs. R_386_PC32 is ignored and all other relocation types are considered error. When the target of a relocation resides in a discarded section, binutils doesn't throw away the relocation record but nullifies it by changing it to R_386_NONE, which unfortunately makes relocs fail. The problem was triggered by yet out-of-tree x86 stack unwind patches but given the binutils behavior, ignoring R_386_NONE is the right thing to do. The problem has been tracked down to binutils behavior by Jan Beulich. [ Impact: fix build with certain binutils by ignoring R_386_NONE ] Signed-off-by: Tejun Heo Cc: Jan Beulich Cc: Ingo Molnar LKML-Reference: <4A1B8150.40702@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/relocs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index 857e492c571e..bbeb0c3fbd90 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -504,8 +504,11 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) if (sym->st_shndx == SHN_ABS) { continue; } - if (r_type == R_386_PC32) { - /* PC relative relocations don't need to be adjusted */ + if (r_type == R_386_NONE || r_type == R_386_PC32) { + /* + * NONE can be ignored and and PC relative + * relocations don't need to be adjusted. + */ } else if (r_type == R_386_32) { /* Visit relocations that need to be adjusted */ From f11a377b3f4e897d11f0e8d1fc688667e2f19708 Mon Sep 17 00:00:00 2001 From: David Dillow Date: Fri, 22 May 2009 15:29:34 +0000 Subject: [PATCH 61/91] r8169: avoid losing MSI interrupts The 8169 chip only generates MSI interrupts when all enabled event sources are quiescent and one or more sources transition to active. If not all of the active events are acknowledged, or a new event becomes active while the existing ones are cleared in the handler, we will not see a new interrupt. The current interrupt handler masks off the Rx and Tx events once the NAPI handler has been scheduled, which opens a race window in which we can get another Rx or Tx event and never ACK'ing it, stopping all activity until the link is reset (ifconfig down/up). Fix this by always ACK'ing all event sources, and loop in the handler until we have all sources quiescent. Signed-off-by: David Dillow Tested-by: Michael Buesch Signed-off-by: David S. Miller --- drivers/net/r8169.c | 112 ++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 0b6e8c896835..8247a945a1d9 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -3554,54 +3554,64 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) int handled = 0; int status; + /* loop handling interrupts until we have no new ones or + * we hit a invalid/hotplug case. + */ status = RTL_R16(IntrStatus); + while (status && status != 0xffff) { + handled = 1; - /* hotplug/major error/no more work/shared irq */ - if ((status == 0xffff) || !status) - goto out; - - handled = 1; - - if (unlikely(!netif_running(dev))) { - rtl8169_asic_down(ioaddr); - goto out; - } - - status &= tp->intr_mask; - RTL_W16(IntrStatus, - (status & RxFIFOOver) ? (status | RxOverflow) : status); - - if (!(status & tp->intr_event)) - goto out; - - /* Work around for rx fifo overflow */ - if (unlikely(status & RxFIFOOver) && - (tp->mac_version == RTL_GIGA_MAC_VER_11)) { - netif_stop_queue(dev); - rtl8169_tx_timeout(dev); - goto out; - } - - if (unlikely(status & SYSErr)) { - rtl8169_pcierr_interrupt(dev); - goto out; - } - - if (status & LinkChg) - rtl8169_check_link_status(dev, tp, ioaddr); - - if (status & tp->napi_event) { - RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); - tp->intr_mask = ~tp->napi_event; - - if (likely(napi_schedule_prep(&tp->napi))) - __napi_schedule(&tp->napi); - else if (netif_msg_intr(tp)) { - printk(KERN_INFO "%s: interrupt %04x in poll\n", - dev->name, status); + /* Handle all of the error cases first. These will reset + * the chip, so just exit the loop. + */ + if (unlikely(!netif_running(dev))) { + rtl8169_asic_down(ioaddr); + break; } + + /* Work around for rx fifo overflow */ + if (unlikely(status & RxFIFOOver) && + (tp->mac_version == RTL_GIGA_MAC_VER_11)) { + netif_stop_queue(dev); + rtl8169_tx_timeout(dev); + break; + } + + if (unlikely(status & SYSErr)) { + rtl8169_pcierr_interrupt(dev); + break; + } + + if (status & LinkChg) + rtl8169_check_link_status(dev, tp, ioaddr); + + /* We need to see the lastest version of tp->intr_mask to + * avoid ignoring an MSI interrupt and having to wait for + * another event which may never come. + */ + smp_rmb(); + if (status & tp->intr_mask & tp->napi_event) { + RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); + tp->intr_mask = ~tp->napi_event; + + if (likely(napi_schedule_prep(&tp->napi))) + __napi_schedule(&tp->napi); + else if (netif_msg_intr(tp)) { + printk(KERN_INFO "%s: interrupt %04x in poll\n", + dev->name, status); + } + } + + /* We only get a new MSI interrupt when all active irq + * sources on the chip have been acknowledged. So, ack + * everything we've seen and check if new sources have become + * active to avoid blocking all interrupts from the chip. + */ + RTL_W16(IntrStatus, + (status & RxFIFOOver) ? (status | RxOverflow) : status); + status = RTL_R16(IntrStatus); } -out: + return IRQ_RETVAL(handled); } @@ -3617,13 +3627,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete(napi); - tp->intr_mask = 0xffff; - /* - * 20040426: the barrier is not strictly required but the - * behavior of the irq handler could be less predictable - * without it. Btw, the lack of flush for the posted pci - * write is safe - FR + + /* We need for force the visibility of tp->intr_mask + * for other CPUs, as we can loose an MSI interrupt + * and potentially wait for a retransmit timeout if we don't. + * The posted write to IntrMask is safe, as it will + * eventually make it to the chip and we won't loose anything + * until it does. */ + tp->intr_mask = 0xffff; smp_wmb(); RTL_W16(IntrMask, tp->intr_event); } From 4319503779060120fa5de9b8fde056603bb6e0fd Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 6 Mar 2009 20:24:57 +0000 Subject: [PATCH 62/91] [CPUFREQ] add atom family to p4-clockmod Some atom procs don't do freq scaling (such as the atom 330 on my own littlefalls2 board). By adding the atom family here, we at least get the benefit of passive cooling in a thermal emergency. Not sure how to see that its actually helping any, but the driver does bind and claim its functioning on my atom 330. Signed-off-by: Jarod Wilson Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 6ac55bd341ae..869615193720 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -168,6 +168,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) case 0x0E: /* Core */ case 0x0F: /* Core Duo */ case 0x16: /* Celeron Core */ + case 0x1C: /* Atom */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); case 0x0D: /* Pentium M (Dothan) */ From d38e73e8dad454a5916f446b0d3523c1161ae95a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 23 Apr 2009 13:36:12 -0400 Subject: [PATCH 63/91] [CPUFREQ] powernow-k7 build fix when ACPI=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/cpu/cpufreq/powernow-k7.c:172: warning: 'invalidate_entry' defined but not used Reported-by: Toralf Förster Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 3c28ccd49742..a8363e5be4ef 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -168,10 +168,12 @@ static int check_powernow(void) return 1; } +#ifdef CONFIG_X86_POWERNOW_K7_ACPI static void invalidate_entry(unsigned int entry) { powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; } +#endif static int get_ranges(unsigned char *pst) { From 42a06f2166f2f6f7bf04f32b4e823eacdceafdc9 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 17 May 2009 10:23:52 -0400 Subject: [PATCH 64/91] [CPUFREQ] remove rwsem lock from CPUFREQ_GOV_STOP call * Rafael J. Wysocki (rjw@sisk.pl) wrote: > This message has been generated automatically as a part of a report > of regressions introduced between 2.6.28 and 2.6.29. > > The following bug entry is on the current list of known regressions > introduced between 2.6.28 and 2.6.29. Please verify if it still should > be listed and let me know (either way). > > > Bug-Entry : http://bugzilla.kernel.org/show_bug.cgi?id=13186 > Subject : cpufreq timer teardown problem > Submitter : Mathieu Desnoyers > Date : 2009-04-23 14:00 (24 days old) > References : http://marc.info/?l=linux-kernel&m=124049523515036&w=4 > Handled-By : Mathieu Desnoyers > Patch : http://patchwork.kernel.org/patch/19754/ > http://patchwork.kernel.org/patch/19753/ The patches linked above depend on the following patch to remove circular locking dependency : cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call (the following issue was faced when using cancel_delayed_work_sync() in the timer teardown (which fixes a race). * KOSAKI Motohiro (kosaki.motohiro@jp.fujitsu.com) wrote: > Hi > > my box output following warnings. > it seems regression by commit 7ccc7608b836e58fbacf65ee4f8eefa288e86fac. > > A: work -> do_dbs_timer() -> cpu_policy_rwsem > B: store() -> cpu_policy_rwsem -> cpufreq_governor_dbs() -> work > > Hrm, I think it must be due to my attempt to fix the timer teardown race in ondemand governor mixed with new locking behavior in 2.6.30-rc. The rwlock seems to be taken around the whole call to cpufreq_governor_dbs(), when it should be only taken around accesses to the locked data, and especially *not* around the call to dbs_timer_exit(). Reverting my fix attempt would put the teardown race back in place (replacing the cancel_delayed_work_sync by cancel_delayed_work). Instead, a proper fix would imply modifying this critical section : cpufreq.c: __cpufreq_remove_dev() ... if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); unlock_policy_rwsem_write(cpu); To make sure the __cpufreq_governor() callback is not called with rwsem held. This would allow execution of cancel_delayed_work_sync() without being nested within the rwsem. Applies on top of the 2.6.30-rc5 tree. Required to remove circular dep in teardown of both conservative and ondemande governors so they can use cancel_delayed_work_sync(). CPUFREQ_GOV_STOP does not modify the policy, therefore this locking seemed unneeded. Signed-off-by: Mathieu Desnoyers CC: KOSAKI Motohiro Cc: Greg KH CC: Ingo Molnar CC: "Rafael J. Wysocki" CC: Ben Slusky CC: Chris Wright CC: Andrew Morton Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d270e8eb3e67..47d2ad0ae079 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1070,11 +1070,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) spin_unlock_irqrestore(&cpufreq_driver_lock, flags); #endif + unlock_policy_rwsem_write(cpu); + if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); - unlock_policy_rwsem_write(cpu); - kobject_put(&data->kobj); /* we need to make sure that the underlying kobj is actually From b253d2b2d28ead6fed012feb54694b3d0562839a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 17 May 2009 10:29:33 -0400 Subject: [PATCH 65/91] [CPUFREQ] fix timer teardown in conservative governor * Rafael J. Wysocki (rjw@sisk.pl) wrote: > This message has been generated automatically as a part of a report > of regressions introduced between 2.6.28 and 2.6.29. > > The following bug entry is on the current list of known regressions > introduced between 2.6.28 and 2.6.29. Please verify if it still should > be listed and let me know (either way). > > > Bug-Entry : http://bugzilla.kernel.org/show_bug.cgi?id=13186 > Subject : cpufreq timer teardown problem > Submitter : Mathieu Desnoyers > Date : 2009-04-23 14:00 (24 days old) > References : http://marc.info/?l=linux-kernel&m=124049523515036&w=4 > Handled-By : Mathieu Desnoyers > Patch : http://patchwork.kernel.org/patch/19754/ > http://patchwork.kernel.org/patch/19753/ > (re-send with updated changelog) cpufreq fix timer teardown in conservative governor The problem is that dbs_timer_exit() uses cancel_delayed_work() when it should use cancel_delayed_work_sync(). cancel_delayed_work() does not wait for the workqueue handler to exit. The ondemand governor does not seem to be affected because the "if (!dbs_info->enable)" check at the beginning of the workqueue handler returns immediately without rescheduling the work. The conservative governor in 2.6.30-rc has the same check as the ondemand governor, which makes things usually run smoothly. However, if the governor is quickly stopped and then started, this could lead to the following race : dbs_enable could be reenabled and multiple do_dbs_timer handlers would run. This is why a synchronized teardown is required. Depends on patch cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call The following patch applies to 2.6.30-rc2. Stable kernels have a similar issue which should also be fixed, but the code changed between 2.6.29 and 2.6.30, so this patch only applies to 2.6.30-rc. Signed-off-by: Mathieu Desnoyers CC: Andrew Morton CC: gregkh@suse.de CC: stable@kernel.org CC: cpufreq@vger.kernel.org CC: Ingo Molnar CC: rjw@sisk.pl CC: Ben Slusky Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_conservative.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 2ecd95e4ab1a..7a74d175287b 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -91,6 +91,9 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock * is recursive for the same process. -Venki + * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it + * would deadlock with cancel_delayed_work_sync(), which is needed for proper + * raceless workqueue teardown. */ static DEFINE_MUTEX(dbs_mutex); @@ -542,7 +545,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { dbs_info->enable = 0; - cancel_delayed_work(&dbs_info->work); + cancel_delayed_work_sync(&dbs_info->work); } static int cpufreq_governor_dbs(struct cpufreq_policy *policy, From b14893a62c73af0eca414cfed505b8c09efc613c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 17 May 2009 10:30:45 -0400 Subject: [PATCH 66/91] [CPUFREQ] fix timer teardown in ondemand governor * Rafael J. Wysocki (rjw@sisk.pl) wrote: > This message has been generated automatically as a part of a report > of regressions introduced between 2.6.28 and 2.6.29. > > The following bug entry is on the current list of known regressions > introduced between 2.6.28 and 2.6.29. Please verify if it still should > be listed and let me know (either way). > > > Bug-Entry : http://bugzilla.kernel.org/show_bug.cgi?id=13186 > Subject : cpufreq timer teardown problem > Submitter : Mathieu Desnoyers > Date : 2009-04-23 14:00 (24 days old) > References : http://marc.info/?l=linux-kernel&m=124049523515036&w=4 > Handled-By : Mathieu Desnoyers > Patch : http://patchwork.kernel.org/patch/19754/ > http://patchwork.kernel.org/patch/19753/ > (updated changelog) cpufreq fix timer teardown in ondemand governor The problem is that dbs_timer_exit() uses cancel_delayed_work() when it should use cancel_delayed_work_sync(). cancel_delayed_work() does not wait for the workqueue handler to exit. The ondemand governor does not seem to be affected because the "if (!dbs_info->enable)" check at the beginning of the workqueue handler returns immediately without rescheduling the work. The conservative governor in 2.6.30-rc has the same check as the ondemand governor, which makes things usually run smoothly. However, if the governor is quickly stopped and then started, this could lead to the following race : dbs_enable could be reenabled and multiple do_dbs_timer handlers would run. This is why a synchronized teardown is required. The following patch applies to, at least, 2.6.28.x, 2.6.29.1, 2.6.30-rc2. Depends on patch cpufreq: remove rwsem lock from CPUFREQ_GOV_STOP call Signed-off-by: Mathieu Desnoyers CC: Andrew Morton CC: gregkh@suse.de CC: stable@kernel.org CC: cpufreq@vger.kernel.org CC: Ingo Molnar CC: rjw@sisk.pl CC: Ben Slusky Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 338f428a15b7..e741c339df76 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -98,6 +98,9 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock * is recursive for the same process. -Venki + * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it + * would deadlock with cancel_delayed_work_sync(), which is needed for proper + * raceless workqueue teardown. */ static DEFINE_MUTEX(dbs_mutex); @@ -562,7 +565,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { dbs_info->enable = 0; - cancel_delayed_work(&dbs_info->work); + cancel_delayed_work_sync(&dbs_info->work); } static int cpufreq_governor_dbs(struct cpufreq_policy *policy, From df1829770db415dc5a5ed5ada3bd70176c6f0a01 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 22 Apr 2009 13:48:32 +0200 Subject: [PATCH 67/91] [CPUFREQ] powernow-k8 cleanup msg if BIOS does not export ACPI _PSS cpufreq data - Make the message shorter and easier to grep for - Use printk_once instead of WARN_ONCE (functionality of these was mixed) Signed-off-by: Thomas Renninger Cc: Langsdorf, Mark Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4709ead2db52..feef10c085a1 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1215,13 +1215,16 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } +static const char ACPI_PSS_BIOS_BUG_MSG[] = + KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" + KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; + /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; int rc; - static int print_once; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1244,19 +1247,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - /* - * Replace this one with print_once as soon as such a - * thing gets introduced - */ - if (!print_once) { - WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS " - "does not provide ACPI _PSS objects " - "in a way that Linux understands. " - "Please report this to the Linux ACPI" - " maintainers and complain to your " - "BIOS vendor.\n"); - print_once++; - } + printk_once(ACPI_PSS_BIOS_BUG_MSG); goto err_out; } if (pol->cpu != 0) { From ca446d06351992e4f1a7c1e5e99870ab4ec5188f Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 22 Apr 2009 13:48:33 +0200 Subject: [PATCH 68/91] [CPUFREQ] powernow-k8: determine exact CPU frequency for HW Pstates Slightly modified by trenn@suse.de -> only do this on fam 10h and fam 11h. Currently powernow-k8 determines CPU frequency from ACPI PSS objects, but according to AMD family 11h BKDG this frequency is just a rounded value: "CoreFreq (MHz) = The CPU COF specified by MSRC001_00[6B:64][CpuFid] rounded to the nearest 100 Mhz." As a consequnce powernow-k8 reports wrong CPU frequency on some systems, e.g. on Turion X2 Ultra: powernow-k8: Found 1 AMD Turion(tm)X2 Ultra DualCore Mobile ZM-82 processors (2 cpu cores) (version 2.20.00) powernow-k8: 0 : pstate 0 (2200 MHz) powernow-k8: 1 : pstate 1 (1100 MHz) powernow-k8: 2 : pstate 2 (600 MHz) But this is wrong as frequency for Pstate2 is 550 MHz. x86info reports it correctly: #x86info -a |grep Pstate ... Pstate-0: fid=e, did=0, vid=24 (2200MHz) Pstate-1: fid=e, did=1, vid=30 (1100MHz) Pstate-2: fid=e, did=2, vid=3c (550MHz) (current) Solution is to determine the frequency directly from Pstate MSRs instead of using rounded values from ACPI table. Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index feef10c085a1..f6b32d112357 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -649,6 +649,20 @@ static void print_basics(struct powernow_k8_data *data) data->batps); } +static u32 freq_from_fid_did(u32 fid, u32 did) +{ + u32 mhz = 0; + + if (boot_cpu_data.x86 == 0x10) + mhz = (100 * (fid + 0x10)) >> did; + else if (boot_cpu_data.x86 == 0x11) + mhz = (100 * (fid + 8)) >> did; + else + BUG(); + + return mhz * 1000; +} + static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) { @@ -923,8 +937,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, powernow_table[i].index = index; - powernow_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; + /* Frequency may be rounded for these */ + if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { + powernow_table[i].frequency = + freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); + } else + powernow_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; } return 0; } From e4a5d54f924ea5ce2913d9d0687d034004816465 Mon Sep 17 00:00:00 2001 From: Ma Ling Date: Tue, 26 May 2009 11:31:00 +0800 Subject: [PATCH 69/91] drm/i915: Add support for VGA load detection (pre-945). Two approaches for VGA detections: hot plug detection for 945G onwards and load pipe detection for Pre-945G. Load pipe detection will get one free pipe, set border color as red and blue, then check CRT status by swf register. This is a sync-up with the 2D driver. Signed-off-by: Ma Ling Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_crt.c | 149 ++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 19148c3df637..640f5158effc 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -198,9 +198,142 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) return intel_ddc_probe(intel_output); } +static enum drm_connector_status +intel_crt_load_detect(struct drm_crtc *crtc, struct intel_output *intel_output) +{ + struct drm_encoder *encoder = &intel_output->enc; + struct drm_device *dev = encoder->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + uint32_t pipe = intel_crtc->pipe; + uint32_t save_bclrpat; + uint32_t save_vtotal; + uint32_t vtotal, vactive; + uint32_t vsample; + uint32_t vblank, vblank_start, vblank_end; + uint32_t dsl; + uint32_t bclrpat_reg; + uint32_t vtotal_reg; + uint32_t vblank_reg; + uint32_t vsync_reg; + uint32_t pipeconf_reg; + uint32_t pipe_dsl_reg; + uint8_t st00; + enum drm_connector_status status; + + if (pipe == 0) { + bclrpat_reg = BCLRPAT_A; + vtotal_reg = VTOTAL_A; + vblank_reg = VBLANK_A; + vsync_reg = VSYNC_A; + pipeconf_reg = PIPEACONF; + pipe_dsl_reg = PIPEADSL; + } else { + bclrpat_reg = BCLRPAT_B; + vtotal_reg = VTOTAL_B; + vblank_reg = VBLANK_B; + vsync_reg = VSYNC_B; + pipeconf_reg = PIPEBCONF; + pipe_dsl_reg = PIPEBDSL; + } + + save_bclrpat = I915_READ(bclrpat_reg); + save_vtotal = I915_READ(vtotal_reg); + vblank = I915_READ(vblank_reg); + + vtotal = ((save_vtotal >> 16) & 0xfff) + 1; + vactive = (save_vtotal & 0x7ff) + 1; + + vblank_start = (vblank & 0xfff) + 1; + vblank_end = ((vblank >> 16) & 0xfff) + 1; + + /* Set the border color to purple. */ + I915_WRITE(bclrpat_reg, 0x500050); + + if (IS_I9XX(dev)) { + uint32_t pipeconf = I915_READ(pipeconf_reg); + I915_WRITE(pipeconf_reg, pipeconf | PIPECONF_FORCE_BORDER); + /* Wait for next Vblank to substitue + * border color for Color info */ + intel_wait_for_vblank(dev); + st00 = I915_READ8(VGA_MSR_WRITE); + status = ((st00 & (1 << 4)) != 0) ? + connector_status_connected : + connector_status_disconnected; + + I915_WRITE(pipeconf_reg, pipeconf); + } else { + bool restore_vblank = false; + int count, detect; + + /* + * If there isn't any border, add some. + * Yes, this will flicker + */ + if (vblank_start <= vactive && vblank_end >= vtotal) { + uint32_t vsync = I915_READ(vsync_reg); + uint32_t vsync_start = (vsync & 0xffff) + 1; + + vblank_start = vsync_start; + I915_WRITE(vblank_reg, + (vblank_start - 1) | + ((vblank_end - 1) << 16)); + restore_vblank = true; + } + /* sample in the vertical border, selecting the larger one */ + if (vblank_start - vactive >= vtotal - vblank_end) + vsample = (vblank_start + vactive) >> 1; + else + vsample = (vtotal + vblank_end) >> 1; + + /* + * Wait for the border to be displayed + */ + while (I915_READ(pipe_dsl_reg) >= vactive) + ; + while ((dsl = I915_READ(pipe_dsl_reg)) <= vsample) + ; + /* + * Watch ST00 for an entire scanline + */ + detect = 0; + count = 0; + do { + count++; + /* Read the ST00 VGA status register */ + st00 = I915_READ8(VGA_MSR_WRITE); + if (st00 & (1 << 4)) + detect++; + } while ((I915_READ(pipe_dsl_reg) == dsl)); + + /* restore vblank if necessary */ + if (restore_vblank) + I915_WRITE(vblank_reg, vblank); + /* + * If more than 3/4 of the scanline detected a monitor, + * then it is assumed to be present. This works even on i830, + * where there isn't any way to force the border color across + * the screen + */ + status = detect * 4 > count * 3 ? + connector_status_connected : + connector_status_disconnected; + } + + /* Restore previous settings */ + I915_WRITE(bclrpat_reg, save_bclrpat); + + return status; +} + static enum drm_connector_status intel_crt_detect(struct drm_connector *connector) { struct drm_device *dev = connector->dev; + struct intel_output *intel_output = to_intel_output(connector); + struct drm_encoder *encoder = &intel_output->enc; + struct drm_crtc *crtc; + int dpms_mode; + enum drm_connector_status status; if (IS_I9XX(dev) && !IS_I915G(dev) && !IS_I915GM(dev)) { if (intel_crt_detect_hotplug(connector)) @@ -212,8 +345,20 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto if (intel_crt_detect_ddc(connector)) return connector_status_connected; - /* TODO use load detect */ - return connector_status_unknown; + /* for pre-945g platforms use load detect */ + if (encoder->crtc && encoder->crtc->enabled) { + status = intel_crt_load_detect(encoder->crtc, intel_output); + } else { + crtc = intel_get_load_detect_pipe(intel_output, + NULL, &dpms_mode); + if (crtc) { + status = intel_crt_load_detect(crtc, intel_output); + intel_release_load_detect_pipe(intel_output, dpms_mode); + } else + status = connector_status_unknown; + } + + return status; } static void intel_crt_destroy(struct drm_connector *connector) From 68743082b560067e3e93eab8b2568f238e486865 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Tue, 26 May 2009 14:51:00 -0400 Subject: [PATCH 70/91] XPRTRDMA: fix client rpcrdma FRMR registration on mlx4 devices mlx4/connectX FRMR requires local write enable together with remote rdma write enable. This fixes NFS/RDMA operation over the ConnectX Infiniband HCA in the default memreg mode. Signed-off-by: Vu Pham Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3b21e0cc5e69..465aafc2007f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; frmr_wr.wr.fast_reg.access_flags = (writing ? - IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ); frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); From d0367a508af9cf97beb202935bb9ad8883d30cd1 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 26 May 2009 14:51:00 -0400 Subject: [PATCH 71/91] nfs: fix build error in nfsroot with initconst fix build error with latest kbuild adjustments to initconst. The commit a447c0932445f92ce6f4c1bd020f62c5097a7842 ("vfs: Use const for kernel parser table") changed: static match_table_t __initdata tokens = { to static match_table_t __initconst tokens = { But the missing const causes popwerpc to fail with latest updates to __initconst like this: fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict The bug is only present with kbuild-next. Following patch has been build tested. Signed-off-by: Sam Ravnborg Cc: Steven Whitehouse Cc: Stephen Rothwell Acked-by: Jan Beulich Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index d9ef602fbc5a..e3ed5908820b 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -129,7 +129,7 @@ enum { Opt_err }; -static match_table_t __initconst tokens = { +static const match_table_t tokens __initconst = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, From 95baa25c7321eb8613246acbf61b97911cc748d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 May 2009 14:51:00 -0400 Subject: [PATCH 72/91] NFSv4: Fix the case where NFSv4 renewal fails If the asynchronous lease renewal fails (usually due to a soft timeout), then we _must_ schedule state recovery in order to ensure that we don't lose the lease unnecessarily or, if the lease is already lost, that we recover the locking state promptly... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a4d242680299..4674f8092da8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2594,12 +2594,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { - switch (task->tk_status) { - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_CB_PATH_DOWN: - nfs4_schedule_state_recovery(clp); - } + /* Unless we're shutting down, schedule state recovery! */ + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + nfs4_schedule_state_recovery(clp); return; } spin_lock(&clp->cl_lock); From ab2b7ebaad16226c9a5e85c5f384d19fa58a7459 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 26 May 2009 09:11:03 +0100 Subject: [PATCH 73/91] kmod: Release sub_info on cred allocation failure. call_usermodehelper_setup() forgot to kfree(sub_info) when prepare_usermodehelper_creds() failed. Signed-off-by: Tetsuo Handa Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- kernel/kmod.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index b750675251e5..7e95bedb2bfc 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -370,8 +370,10 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->argv = argv; sub_info->envp = envp; sub_info->cred = prepare_usermodehelper_creds(); - if (!sub_info->cred) + if (!sub_info->cred) { + kfree(sub_info); return NULL; + } out: return sub_info; From 564346224daaa8f7222d7a92cdbb7bafde59ae6e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 May 2009 20:54:41 +0930 Subject: [PATCH 74/91] lguest: fix on Intel when KVM loaded (unhandled trap 13) When KVM is loaded, and hence VT set up, the vmcall instruction in an lguest guest causes a #GP, not #UD. Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- drivers/lguest/x86/core.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 1a83910f674f..eaf722fe309a 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -358,6 +358,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) if (emulate_insn(cpu)) return; } + /* If KVM is active, the vmcall instruction triggers a + * General Protection Fault. Normally it triggers an + * invalid opcode fault (6): */ + case 6: + /* We need to check if ring == GUEST_PL and + * faulting instruction == vmcall. */ + if (is_hypercall(cpu)) { + rewrite_hypercall(cpu); + return; + } break; case 14: /* We've intercepted a Page Fault. */ /* The Guest accessed a virtual address that wasn't mapped. @@ -403,15 +413,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * up the pointer now to indicate a hypercall is pending. */ cpu->hcall = (struct hcall_args *)cpu->regs; return; - case 6: - /* kvm hypercalls trigger an invalid opcode fault (6). - * We need to check if ring == GUEST_PL and - * faulting instruction == vmcall. */ - if (is_hypercall(cpu)) { - rewrite_hypercall(cpu); - return; - } - break; } /* We didn't handle the trap, so it needs to go to the Guest. */ From 2171787be2e71ff71159857bfeb21398b61eb615 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Tue, 26 May 2009 10:33:35 -0700 Subject: [PATCH 75/91] x86: avoid back to back on_each_cpu in cpa_flush_array Cleanup cpa_flush_array() to avoid back to back on_each_cpu() calls. [ Impact: optimizes fix 0af48f42df15b97080b450d24219dd95db7b929a ] Signed-off-by: Venkatesh Pallipadi Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0f9052bcec4b..e17efed088c5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -204,30 +204,19 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) } } -static void wbinvd_local(void *unused) -{ - wbinvd(); -} - static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; + unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ BUG_ON(irqs_disabled()); - on_each_cpu(__cpa_flush_range, NULL, 1); + on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); - if (!cache) + if (!cache || do_wbinvd) return; - /* 4M threshold */ - if (numpages >= 1024) { - if (boot_cpu_data.x86 >= 4) - on_each_cpu(wbinvd_local, NULL, 1); - - return; - } /* * We only need to flush on one CPU, * clflush is a MESI-coherent instruction that From b1338d199dda6681d9af0297928af0a7eb9cba7b Mon Sep 17 00:00:00 2001 From: Herton Ronaldo Krzesinski Date: Tue, 26 May 2009 12:15:53 +0900 Subject: [PATCH 76/91] tomoyo: add missing call to cap_bprm_set_creds cap_bprm_set_creds() has to be called from security_bprm_set_creds(). TOMOYO forgot to call cap_bprm_set_creds() from tomoyo_bprm_set_creds() and suid executables were not being working. Make sure we call cap_bprm_set_creds() with TOMOYO, to set credentials properly inside tomoyo_bprm_set_creds(). Signed-off-by: Herton Ronaldo Krzesinski Acked-by: Tetsuo Handa Signed-off-by: James Morris --- security/tomoyo/tomoyo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 5b481912752a..e42be5c4f055 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -27,6 +27,12 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) { + int rc; + + rc = cap_bprm_set_creds(bprm); + if (rc) + return rc; + /* * Do only if this function is called for the first time of an execve * operation. From 84532a0fc3d5811dca8e3726fe4d372ea87bd7c6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:33:14 +1000 Subject: [PATCH 77/91] Revert "powerpc: Rework dma-noncoherent to use generic vmalloc layer" This reverts commit 33f00dcedb0e22cdb156a23632814fc580fcfcf8. While it was a good idea to try to use the mm/vmalloc.c allocator instead of our own (in fact, ours is itself a dup on an old variant of the vmalloc one), unfortunately, the approach is terminally busted since dma_alloc_coherent() can be called at interrupt time or in atomic contexts and there's little chances we'll make the code in mm/vmalloc.c cope with\ that :-( Until we can get the generic code to forbid that idiocy and fix all drivers abusing it, we pretty much have no choice but revert to our custom virtual space allocator. There's also a problem with SMP safety since freeing such mapping would require an IPI which cannot be done at interrupt time. However, right now, I don't think we support any platform that is both SMP and has non-coherent DMA (don't laugh, I know such things do exist !) so we can sort that out later. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 25 +++ arch/powerpc/lib/dma-noncoherent.c | 299 +++++++++++++++++++++++------ 2 files changed, 269 insertions(+), 55 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a0d1146a0578..3bb43adce44d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -868,6 +868,31 @@ config TASK_SIZE default "0x80000000" if PPC_PREP || PPC_8xx default "0xc0000000" +config CONSISTENT_START_BOOL + bool "Set custom consistent memory pool address" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the base virtual address + of the consistent memory pool. This pool of virtual + memory is used to make consistent memory allocations. + +config CONSISTENT_START + hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL + default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx) + default "0xff100000" if NOT_COHERENT_CACHE + +config CONSISTENT_SIZE_BOOL + bool "Set custom consistent memory pool size" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the size of the + consistent memory pool. This pool of virtual memory + is used to make consistent memory allocations. + +config CONSISTENT_SIZE + hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL + default "0x00200000" if NOT_COHERENT_CACHE + config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" depends on ADVANCED_OPTIONS && 8xx diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c index 005a28d380af..b7dc4c19f582 100644 --- a/arch/powerpc/lib/dma-noncoherent.c +++ b/arch/powerpc/lib/dma-noncoherent.c @@ -29,10 +29,120 @@ #include #include #include -#include #include +/* + * This address range defaults to a value that is safe for all + * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It + * can be further configured for specific applications under + * the "Advanced Setup" menu. -Matt + */ +#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) +#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) + +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte; +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vm_region_alloc with an appropriate + * struct vm_region head (eg): + * + * struct vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vm_region_alloc(). + */ +struct ppc_vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +static struct ppc_vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + +static struct ppc_vm_region * +ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct ppc_vm_region *c, *new; + + new = kmalloc(sizeof(struct ppc_vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) +{ + struct ppc_vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + /* * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. @@ -41,21 +151,21 @@ void * __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) { struct page *page; + struct ppc_vm_region *c; unsigned long order; - int i; - unsigned int nr_pages = PAGE_ALIGN(size)>>PAGE_SHIFT; - unsigned int array_size = nr_pages * sizeof(struct page *); - struct page **pages; - struct page *end; u64 mask = 0x00ffffff, limit; /* ISA default */ - struct vm_struct *area; - BUG_ON(!mem_init_done); + if (!consistent_pte) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + size = PAGE_ALIGN(size); limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - printk(KERN_WARNING "coherent allocation too big (requested " - "%#x mask %#Lx)\n", size, mask); + if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", + size, mask); return NULL; } @@ -68,8 +178,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) if (!page) goto no_page; - end = page + (1 << order); - /* * Invalidate any data that might be lurking in the * kernel direct-mapped region for device DMA. @@ -80,59 +188,48 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) flush_dcache_range(kaddr, kaddr + size); } - split_page(page, order); - /* - * Set the "dma handle" + * Allocate a virtual address in the consistent mapping region. */ - *handle = page_to_phys(page); + c = ppc_vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + unsigned long vaddr = c->vm_start; + pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); + struct page *end = page + (1 << order); - area = get_vm_area_caller(size, VM_IOREMAP, - __builtin_return_address(1)); - if (!area) - goto out_free_pages; + split_page(page, order); - if (array_size > PAGE_SIZE) { - pages = vmalloc(array_size); - area->flags |= VM_VPAGES; - } else { - pages = kmalloc(array_size, GFP_KERNEL); - } - if (!pages) - goto out_free_area; + /* + * Set the "dma handle" + */ + *handle = page_to_phys(page); - area->pages = pages; - area->nr_pages = nr_pages; + do { + BUG_ON(!pte_none(*pte)); - for (i = 0; i < nr_pages; i++) - pages[i] = page + i; + SetPageReserved(page); + set_pte_at(&init_mm, vaddr, + pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + page++; + pte++; + vaddr += PAGE_SIZE; + } while (size -= PAGE_SIZE); - if (map_vm_area(area, pgprot_noncached(PAGE_KERNEL), &pages)) - goto out_unmap; + /* + * Free the otherwise unused pages. + */ + while (page < end) { + __free_page(page); + page++; + } - /* - * Free the otherwise unused pages. - */ - page += nr_pages; - while (page < end) { - __free_page(page); - page++; + return (void *)c->vm_start; } - return area->addr; -out_unmap: - vunmap(area->addr); - if (array_size > PAGE_SIZE) - vfree(pages); - else - kfree(pages); - goto out_free_pages; -out_free_area: - free_vm_area(area); -out_free_pages: if (page) __free_pages(page, order); -no_page: + no_page: return NULL; } EXPORT_SYMBOL(__dma_alloc_coherent); @@ -142,11 +239,103 @@ EXPORT_SYMBOL(__dma_alloc_coherent); */ void __dma_free_coherent(size_t size, void *vaddr) { - vfree(vaddr); + struct ppc_vm_region *c; + unsigned long flags, addr; + pte_t *ptep; + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); + if (!c) + goto no_area; + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + + __free_page(page); + continue; + } + } + + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + + no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, vaddr); + dump_stack(); } EXPORT_SYMBOL(__dma_free_coherent); +/* + * Initialise the consistent memory allocation. + */ +static int __init dma_alloc_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int ret = 0; + + do { + pgd = pgd_offset(&init_mm, CONSISTENT_BASE); + pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); + pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + + pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte = pte; + } while (0); + + return ret; +} + +core_initcall(dma_alloc_init); + /* * make an area consistent. */ From 7a1450fdf4c69961f3926352fd8bc4ea19676756 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 May 2009 04:55:38 -0400 Subject: [PATCH 78/91] Blackfin: hook up preadv/pwritev syscalls Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/unistd.h | 4 +++- arch/blackfin/mach-common/entry.S | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index 1e57b636e0bc..cf5066d3efd2 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -378,8 +378,10 @@ #define __NR_dup3 363 #define __NR_pipe2 364 #define __NR_inotify_init1 365 +#define __NR_preadv 366 +#define __NR_pwritev 367 -#define __NR_syscall 366 +#define __NR_syscall 368 #define NR_syscalls __NR_syscall /* Old optional stuff no one actually uses */ diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 21e65a339a22..a063a434f7e3 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1581,6 +1581,8 @@ ENTRY(_sys_call_table) .long _sys_dup3 .long _sys_pipe2 .long _sys_inotify_init1 /* 365 */ + .long _sys_preadv + .long _sys_pwritev .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall From 6c83429a1c32c914dfb81939cc2ddece97e48294 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sun, 24 May 2009 02:13:15 -0400 Subject: [PATCH 79/91] MAINTAINERS: update Blackfin items With Bryan Wu having moved on to another job, push the slack onto some other ADI lackeys. Signed-off-by: Mike Frysinger --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 77cbfb1a696c..7dd34750264f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1132,16 +1132,16 @@ F: fs/bfs/ F: include/linux/bfs_fs.h BLACKFIN ARCHITECTURE -P: Bryan Wu -M: cooloney@kernel.org +P: Mike Frysinger +M: vapier@gentoo.org L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: arch/blackfin/ BLACKFIN EMAC DRIVER -P: Bryan Wu -M: cooloney@kernel.org +P: Michael Hennerich +M: michael.hennerich@analog.com L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) W: http://blackfin.uclinux.org S: Supported From 49afa60948f859e71d68a74c1af6ccd7b5b94d82 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 18 May 2009 04:33:07 -0400 Subject: [PATCH 80/91] MAINTAINERS: drop (subscribers-only) markings on Blackfin lists All of the Blackfin lists are transparently moderated for non-subscribers. i.e. there are no annoying notices and people get whitelisted after first their posting. Signed-off-by: Mike Frysinger --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7dd34750264f..5ee166e27b9f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1142,7 +1142,7 @@ F: arch/blackfin/ BLACKFIN EMAC DRIVER P: Michael Hennerich M: michael.hennerich@analog.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/net/bfin_mac.* @@ -1150,7 +1150,7 @@ F: drivers/net/bfin_mac.* BLACKFIN RTC DRIVER P: Mike Frysinger M: vapier.adi@gmail.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/rtc/rtc-bfin.c @@ -1158,7 +1158,7 @@ F: drivers/rtc/rtc-bfin.c BLACKFIN SERIAL DRIVER P: Sonic Zhang M: sonic.zhang@analog.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/serial/bfin_5xx.c @@ -1166,7 +1166,7 @@ F: drivers/serial/bfin_5xx.c BLACKFIN WATCHDOG DRIVER P: Mike Frysinger M: vapier.adi@gmail.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/watchdog/bfin_wdt.c @@ -1174,7 +1174,7 @@ F: drivers/watchdog/bfin_wdt.c BLACKFIN I2C TWI DRIVER P: Sonic Zhang M: sonic.zhang@analog.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org/ S: Supported F: drivers/i2c/busses/i2c-bfin-twi.c From 6b50520b2fd9bf521f9c947b5f6999bad273a51d Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 19 May 2009 10:03:22 -0400 Subject: [PATCH 81/91] Blackfin: ignore generated vmlinux.lds Signed-off-by: Mike Frysinger --- arch/blackfin/kernel/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/blackfin/kernel/.gitignore diff --git a/arch/blackfin/kernel/.gitignore b/arch/blackfin/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/blackfin/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds From 2ec10ea91bf3688013b00638f29df4f8f6b5c18b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 20 May 2009 19:45:39 -0400 Subject: [PATCH 82/91] Blackfin: drop unneeded asm/.gitignore We don't create a include/asm/mach/ symlink anymore, so we don't need the .gitignore for it. Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/blackfin/include/asm/.gitignore diff --git a/arch/blackfin/include/asm/.gitignore b/arch/blackfin/include/asm/.gitignore deleted file mode 100644 index 7858564a4466..000000000000 --- a/arch/blackfin/include/asm/.gitignore +++ /dev/null @@ -1 +0,0 @@ -+mach From add8a5050a52f1bd1be6b97be86fdd1cfbea2d1d Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 May 2009 05:03:52 -0400 Subject: [PATCH 83/91] Blackfin: fix strncmp.o build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix some more fallout of the string changes: CC arch/blackfin/lib/strncmp.o In file included from include/linux/bitmap.h:9, from include/linux/nodemask.h:90, from include/linux/mmzone.h:17, from include/linux/gfp.h:5, from include/linux/kmod.h:23, from include/linux/module.h:14, from arch/blackfin/lib/strncmp.c:14: include/linux/string.h: In function ‘strstarts’: include/linux/string.h:132: error: implicit declaration of function ‘strncmp’ make[1]: *** [arch/blackfin/lib/strncmp.o] Error 1 Signed-off-by: Mike Frysinger CC: Rusty Russell --- arch/blackfin/lib/strncmp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/blackfin/lib/strncmp.c b/arch/blackfin/lib/strncmp.c index 2aaae78a68e0..46518b1d2983 100644 --- a/arch/blackfin/lib/strncmp.c +++ b/arch/blackfin/lib/strncmp.c @@ -8,9 +8,8 @@ #define strncmp __inline_strncmp #include -#undef strncmp - #include +#undef strncmp int strncmp(const char *cs, const char *ct, size_t count) { From b16e7766d6436835f473ba823ad04fbdfe5e9cbd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:36:10 +1000 Subject: [PATCH 84/91] powerpc: Move dma-noncoherent.c from arch/powerpc/lib to arch/powerpc/mm (pre-requisite to make the next patches more palatable) Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/Makefile | 1 - arch/powerpc/mm/Makefile | 1 + arch/powerpc/{lib => mm}/dma-noncoherent.c | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename arch/powerpc/{lib => mm}/dma-noncoherent.c (100%) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8db35278a4b4..29b742b90f1f 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -18,7 +18,6 @@ obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ memcpy_64.o usercopy_64.o mem_64.o string.o obj-$(CONFIG_XMON) += sstep.o obj-$(CONFIG_KPROBES) += sstep.o -obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o ifeq ($(CONFIG_PPC64),y) obj-$(CONFIG_SMP) += locks.o diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 17290bcedc5e..b746f4ca4209 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -26,3 +26,4 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o +obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c similarity index 100% rename from arch/powerpc/lib/dma-noncoherent.c rename to arch/powerpc/mm/dma-noncoherent.c From f637a49e507c88354ab32b5d914e06acfb7ee00d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:44:50 +1000 Subject: [PATCH 85/91] powerpc: Minor cleanups of kernel virt address space definitions Make FIXADDR_TOP a compile time constant and cleanup a couple of definitions relative to the layout of the kernel address space on ppc32. We also print out that layout at boot time for debugging purposes. This is a pre-requisite for properly fixing non-coherent DMA allocactions. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/fixmap.h | 4 ++-- arch/powerpc/include/asm/pgtable-ppc32.h | 22 ++++++++++++++++++++-- arch/powerpc/mm/init_32.c | 8 ++------ arch/powerpc/mm/mem.c | 13 +++++++++++++ arch/powerpc/mm/pgtable_32.c | 2 -- 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index d60fd18f428c..f1f4e23a84e9 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -14,8 +14,6 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -extern unsigned long FIXADDR_TOP; - #ifndef __ASSEMBLY__ #include #include @@ -24,6 +22,8 @@ extern unsigned long FIXADDR_TOP; #include #endif +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) + /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index ba45c997830f..28fe9d4bae35 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -10,7 +10,7 @@ extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); -extern unsigned long ioremap_bot, ioremap_base; +extern unsigned long ioremap_bot; #ifdef CONFIG_44x extern int icache_44x_need_flush; @@ -55,9 +55,27 @@ extern int icache_44x_need_flush; #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +/* + * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary + * value (for now) on others, from where we can start layout kernel + * virtual space that goes below PKMAP and FIXMAP + */ +#ifdef CONFIG_HIGHMEM +#define KVIRT_TOP PKMAP_BASE +#else +#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#endif + +/* + * ioremap_bot starts at that address. Early ioremaps move down from there, + * until mem_init() at which point this becomes the top of the vmalloc + * and ioremap space + */ +#define IOREMAP_TOP KVIRT_TOP + /* * Just any arbitrary offset to the start of the vmalloc VM area: the - * current 64MB value just means that there will be a 64MB "hole" after the + * current 16MB value just means that there will be a 64MB "hole" after the * physical memory until the kernel virtual memory starts. That means that * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 666a5e8a5be1..3de6a0d93824 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -168,12 +168,8 @@ void __init MMU_init(void) ppc_md.progress("MMU:mapin", 0x301); mapin_ram(); -#ifdef CONFIG_HIGHMEM - ioremap_base = PKMAP_BASE; -#else - ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ -#endif /* CONFIG_HIGHMEM */ - ioremap_bot = ioremap_base; + /* Initialize early top-down ioremap allocator */ + ioremap_bot = IOREMAP_TOP; /* Map in I/O resources */ if (ppc_md.progress) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0602a76bf7f..d3a4e67561fa 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -380,6 +380,19 @@ void __init mem_init(void) bsssize >> 10, initsize >> 10); +#ifdef CONFIG_PPC32 + pr_info("Kernel virtual memory layout:\n"); + pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); +#ifdef CONFIG_HIGHMEM + pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", + PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); +#endif /* CONFIG_HIGHMEM */ + pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", + ioremap_bot, IOREMAP_TOP); + pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", + VMALLOC_START, VMALLOC_END); +#endif /* CONFIG_PPC32 */ + mem_init_done = 1; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 430d0908fa50..5422169626ba 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -399,8 +399,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = (-PAGE_SIZE); -EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { From 8b31e49d1d75729c1da9009664ba52abd1adc628 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:50:33 +1000 Subject: [PATCH 86/91] powerpc: Fix up dma_alloc_coherent() on platforms without cache coherency. The implementation we just revived has issues, such as using a Kconfig-defined virtual address area in kernel space that nothing actually carves out (and thus will overlap whatever is there), or having some dependencies on being self contained in a single PTE page which adds unnecessary constraints on the kernel virtual address space. This fixes it by using more classic PTE accessors and automatically locating the area for consistent memory, carving an appropriate hole in the kernel virtual address space, leaving only the size of that area as a Kconfig option. It also brings some dma-mask related fixes from the ARM implementation which was almost identical initially but grew its own fixes. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 13 --- arch/powerpc/include/asm/dma-mapping.h | 6 +- arch/powerpc/include/asm/pgtable-ppc32.h | 4 + arch/powerpc/kernel/dma.c | 2 +- arch/powerpc/mm/dma-noncoherent.c | 108 +++++++++-------------- arch/powerpc/mm/mem.c | 4 + 6 files changed, 54 insertions(+), 83 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3bb43adce44d..cdc9a6ff4be8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -868,19 +868,6 @@ config TASK_SIZE default "0x80000000" if PPC_PREP || PPC_8xx default "0xc0000000" -config CONSISTENT_START_BOOL - bool "Set custom consistent memory pool address" - depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE - help - This option allows you to set the base virtual address - of the consistent memory pool. This pool of virtual - memory is used to make consistent memory allocations. - -config CONSISTENT_START - hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL - default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx) - default "0xff100000" if NOT_COHERENT_CACHE - config CONSISTENT_SIZE_BOOL bool "Set custom consistent memory pool size" depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index c69f2b5f0cc4..cb448d68452c 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -26,7 +26,9 @@ * allocate the space "normally" and use the cache management functions * to ensure it is consistent. */ -extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp); +struct device; +extern void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); extern void __dma_free_coherent(size_t size, void *vaddr); extern void __dma_sync(void *vaddr, size_t size, int direction); extern void __dma_sync_page(struct page *page, unsigned long offset, @@ -37,7 +39,7 @@ extern void __dma_sync_page(struct page *page, unsigned long offset, * Cache coherent cores. */ -#define __dma_alloc_coherent(gfp, size, handle) NULL +#define __dma_alloc_coherent(dev, gfp, size, handle) NULL #define __dma_free_coherent(size, addr) ((void)0) #define __dma_sync(addr, size, rw) ((void)0) #define __dma_sync_page(pg, off, sz, rw) ((void)0) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 28fe9d4bae35..c9ff9d75990e 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -71,7 +71,11 @@ extern int icache_44x_need_flush; * until mem_init() at which point this becomes the top of the vmalloc * and ioremap space */ +#ifdef CONFIG_NOT_COHERENT_CACHE +#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#else #define IOREMAP_TOP KVIRT_TOP +#endif /* * Just any arbitrary offset to the start of the vmalloc VM area: the diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 53c7788cba78..6b02793dc75b 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -32,7 +32,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, { void *ret; #ifdef CONFIG_NOT_COHERENT_CACHE - ret = __dma_alloc_coherent(size, dma_handle, flag); + ret = __dma_alloc_coherent(dev, size, dma_handle, flag); if (ret == NULL) return NULL; *dma_handle += get_dma_direct_offset(dev); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b7dc4c19f582..36692f5c9a76 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -32,20 +32,21 @@ #include +#include "mmu_decl.h" + /* * This address range defaults to a value that is safe for all * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It * can be further configured for specific applications under * the "Advanced Setup" menu. -Matt */ -#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) -#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_BASE (IOREMAP_TOP) +#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE) #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) /* * This is the page table (2MB) covering uncached, DMA consistent allocations */ -static pte_t *consistent_pte; static DEFINE_SPINLOCK(consistent_lock); /* @@ -148,22 +149,38 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi * virtual and bus address for that space. */ void * -__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) +__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { struct page *page; struct ppc_vm_region *c; unsigned long order; - u64 mask = 0x00ffffff, limit; /* ISA default */ + u64 mask = ISA_DMA_THRESHOLD, limit; - if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + goto no_page; + } + + if ((~mask) & ISA_DMA_THRESHOLD) { + dev_warn(dev, "coherent DMA mask %#llx is smaller " + "than system GFP_DMA mask %#llx\n", + mask, (unsigned long long)ISA_DMA_THRESHOLD); + goto no_page; + } } + size = PAGE_ALIGN(size); limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + if ((limit && size >= limit) || + size >= (CONSISTENT_END - CONSISTENT_BASE)) { printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", size, mask); return NULL; @@ -171,6 +188,7 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) order = get_order(size); + /* Might be useful if we ever have a real legacy DMA zone... */ if (mask != 0xffffffff) gfp |= GFP_DMA; @@ -195,7 +213,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); if (c) { unsigned long vaddr = c->vm_start; - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); struct page *end = page + (1 << order); split_page(page, order); @@ -206,13 +223,10 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) *handle = page_to_phys(page); do { - BUG_ON(!pte_none(*pte)); - SetPageReserved(page); - set_pte_at(&init_mm, vaddr, - pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + map_page(vaddr, page_to_phys(page), + pgprot_noncached(PAGE_KERNEL)); page++; - pte++; vaddr += PAGE_SIZE; } while (size -= PAGE_SIZE); @@ -241,8 +255,7 @@ void __dma_free_coherent(size_t size, void *vaddr) { struct ppc_vm_region *c; unsigned long flags, addr; - pte_t *ptep; - + size = PAGE_ALIGN(size); spin_lock_irqsave(&consistent_lock, flags); @@ -258,29 +271,26 @@ void __dma_free_coherent(size_t size, void *vaddr) size = c->vm_end - c->vm_start; } - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); addr = c->vm_start; do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + pte_t *ptep; unsigned long pfn; - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - + ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr), + addr), + addr), + addr); + if (!pte_none(*ptep) && pte_present(*ptep)) { + pfn = pte_pfn(*ptep); + pte_clear(&init_mm, addr, ptep); if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); + ClearPageReserved(page); __free_page(page); - continue; } } - - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); + addr += PAGE_SIZE; } while (size -= PAGE_SIZE); flush_tlb_kernel_range(c->vm_start, c->vm_end); @@ -300,42 +310,6 @@ void __dma_free_coherent(size_t size, void *vaddr) } EXPORT_SYMBOL(__dma_free_coherent); -/* - * Initialise the consistent memory allocation. - */ -static int __init dma_alloc_init(void) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(dma_alloc_init); - /* * make an area consistent. */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d3a4e67561fa..579382c163a9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -387,6 +387,10 @@ void __init mem_init(void) pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); #endif /* CONFIG_HIGHMEM */ +#ifdef CONFIG_NOT_COHERENT_CACHE + pr_info(" * 0x%08lx..0x%08lx : consistent mem\n", + IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE); +#endif /* CONFIG_NOT_COHERENT_CACHE */ pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", From ed37d83e6aa218192fb28bb6b82498d2a8c74070 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 27 May 2009 21:39:05 +1000 Subject: [PATCH 87/91] md: raid5: change incorrect usage of 'min' macro to 'min_t' A recent patch to raid5.c use min on an int and a sector_t. This isn't allowed. So change it to min_t(sector_t,x,y). Signed-off-by: NeilBrown --- drivers/md/raid5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3c3626d2a1f9..5d400aef8d9b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped safepos = conf->reshape_safe; sector_div(safepos, data_disks); if (mddev->delta_disks < 0) { - writepos -= min(reshape_sectors, writepos); + writepos -= min_t(sector_t, reshape_sectors, writepos); readpos += reshape_sectors; safepos += reshape_sectors; } else { writepos += reshape_sectors; - readpos -= min(reshape_sectors, readpos); - safepos -= min(reshape_sectors, safepos); + readpos -= min_t(sector_t, reshape_sectors, readpos); + safepos -= min_t(sector_t, reshape_sectors, safepos); } /* 'writepos' is the most advanced device address we might write. From 348ca1029e8bae6e0c49097ad25439b17c5326f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 May 2009 15:46:50 +0100 Subject: [PATCH 88/91] FS-Cache: Fixup renamed filenames in comments in internal.h Fix up renamed filenames in comments in fs/fscache/internal.h. Originally, the files were all called fsc-xxx.c, but they got renamed to just xxx.c. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/fscache/internal.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index e0cbd16f6dc9..1c341304621f 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -28,7 +28,7 @@ #define FSCACHE_MAX_THREADS 32 /* - * fsc-cache.c + * cache.c */ extern struct list_head fscache_cache_list; extern struct rw_semaphore fscache_addremove_sem; @@ -37,7 +37,7 @@ extern struct fscache_cache *fscache_select_cache_for_object( struct fscache_cookie *); /* - * fsc-cookie.c + * cookie.c */ extern struct kmem_cache *fscache_cookie_jar; @@ -45,13 +45,13 @@ extern void fscache_cookie_init_once(void *); extern void __fscache_cookie_put(struct fscache_cookie *); /* - * fsc-fsdef.c + * fsdef.c */ extern struct fscache_cookie fscache_fsdef_index; extern struct fscache_cookie_def fscache_fsdef_netfs_def; /* - * fsc-histogram.c + * histogram.c */ #ifdef CONFIG_FSCACHE_HISTOGRAM extern atomic_t fscache_obj_instantiate_histogram[HZ]; @@ -75,7 +75,7 @@ extern const struct file_operations fscache_histogram_fops; #endif /* - * fsc-main.c + * main.c */ extern unsigned fscache_defer_lookup; extern unsigned fscache_defer_create; @@ -86,14 +86,14 @@ extern int fscache_wait_bit(void *); extern int fscache_wait_bit_interruptible(void *); /* - * fsc-object.c + * object.c */ extern void fscache_withdrawing_object(struct fscache_cache *, struct fscache_object *); extern void fscache_enqueue_object(struct fscache_object *); /* - * fsc-operation.c + * operation.c */ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); @@ -104,7 +104,7 @@ extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); /* - * fsc-proc.c + * proc.c */ #ifdef CONFIG_PROC_FS extern int __init fscache_proc_init(void); @@ -115,7 +115,7 @@ extern void fscache_proc_cleanup(void); #endif /* - * fsc-stats.c + * stats.c */ #ifdef CONFIG_FSCACHE_STATS extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; From 911e690e70540f009125bacd16c017eb1a7b1916 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 May 2009 15:46:55 +0100 Subject: [PATCH 89/91] CacheFiles: Fixup renamed filenames in comments in internal.h Fix up renamed filenames in comments in fs/cachefiles/internal.h. Originally, the files were all called cf-xxx.c, but they got renamed to just xxx.c. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/cachefiles/internal.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 19218e1463d6..f7c255f9c624 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -122,13 +122,13 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache) } /* - * cf-bind.c + * bind.c */ extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); /* - * cf-daemon.c + * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; @@ -136,17 +136,17 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned fnr, unsigned bnr); /* - * cf-interface.c + * interface.c */ extern const struct fscache_cache_ops cachefiles_cache_ops; /* - * cf-key.c + * key.c */ extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); /* - * cf-namei.c + * namei.c */ extern int cachefiles_delete_object(struct cachefiles_cache *cache, struct cachefiles_object *object); @@ -165,7 +165,7 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); /* - * cf-proc.c + * proc.c */ #ifdef CONFIG_CACHEFILES_HISTOGRAM extern atomic_t cachefiles_lookup_histogram[HZ]; @@ -190,7 +190,7 @@ void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) #endif /* - * cf-rdwr.c + * rdwr.c */ extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, struct page *, gfp_t); @@ -205,7 +205,7 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *); extern void cachefiles_uncache_page(struct fscache_object *, struct page *); /* - * cf-security.c + * security.c */ extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, @@ -225,7 +225,7 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache, } /* - * cf-xattr.c + * xattr.c */ extern int cachefiles_check_object_type(struct cachefiles_object *object); extern int cachefiles_set_object_xattr(struct cachefiles_object *object, From f49afbb572d5e08ae12f1a979dc2e41745040339 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Thu, 28 May 2009 16:41:36 +0300 Subject: [PATCH 90/91] MAINTAINER: Add F: entries for Gemini and FA526 Signed-off-by: Paulius Zaleckas --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5ee166e27b9f..42c53ab6b9fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -624,6 +624,7 @@ M: paulius.zaleckas@teltonika.lt L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) T: git git://gitorious.org/linux-gemini/mainline.git S: Maintained +F: arch/arm/mach-gemini/ ARM/EBSA110 MACHINE SUPPORT P: Russell King @@ -650,6 +651,7 @@ P: Paulius Zaleckas M: paulius.zaleckas@teltonika.lt L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) S: Maintained +F: arch/arm/mm/*-fa* ARM/FOOTBRIDGE ARCHITECTURE P: Russell King From 67a433ce278b98f47272726a22537fab7fd99de9 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Thu, 28 May 2009 16:42:25 +0300 Subject: [PATCH 91/91] Gemini: Fix SRAM/ROM location after memory swap Signed-off-by: Paulius Zaleckas --- arch/arm/mach-gemini/include/mach/hardware.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-gemini/include/mach/hardware.h b/arch/arm/mach-gemini/include/mach/hardware.h index de6752674c05..213a4fcfeb1c 100644 --- a/arch/arm/mach-gemini/include/mach/hardware.h +++ b/arch/arm/mach-gemini/include/mach/hardware.h @@ -15,10 +15,9 @@ /* * Memory Map definitions */ -/* FIXME: Does it really swap SRAM like this? */ #ifdef CONFIG_GEMINI_MEM_SWAP # define GEMINI_DRAM_BASE 0x00000000 -# define GEMINI_SRAM_BASE 0x20000000 +# define GEMINI_SRAM_BASE 0x70000000 #else # define GEMINI_SRAM_BASE 0x00000000 # define GEMINI_DRAM_BASE 0x10000000