/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

#include "pvfs2-kernel.h"
#include "pvfs2-pnfs.h"

#include "nfsd/nfsd4_pnfs.h"
#include "sunrpc/xdr.h"
#include "sunrpc/svc.h"
#include "nfsd/nfs4layoutxdr.h"
#include "nfsd/nfsfh.h"
#include "nfs4_pnfs.h"
#include "bmi-byteswap.h"

extern int debug; /* used in pvfs2_print */

/* used to protect the layout information for an inode */
spinlock_t pvfs2_layout_lock = SPIN_LOCK_UNLOCKED;

/****** Common Functions ******/
#define pnfs_decode_int32_t(pptr,x) do {      \
        *(x) = bmitoh32(*(int32_t*) *(pptr)); \
        *(pptr) += 4;			      \
    } while (0)
#define pnfs_decode_string(pptr,pbuf) do {				\
	u_int32_t len = bmitoh32(*(u_int32_t *) *(pptr));	\
	*pbuf = *(pptr) + 4;					\
	*(pptr) += roundup8(4 + len + 1);			\
    } while (0)

static int
pvfs2_layout_type(void)
{
    int lt;

    /* 5 is a special file layout type that retrieves the
     * list of data servers from the /proc fs */
    if (layouttype == 5)
	lt = LAYOUT_NFSV4_FILES;
    else
	lt = layouttype;
    return lt;
}

static unsigned int nfsmanual_num_devices;

/****** PVFS2 Layout Functions ******/

/* Set pvfs2 layout information for return to nfsd.
 * DH-TODO: This is copying from the pvfs2_inode_t struct
 * (which exists in a cache), to NFSD alloc'd memory.  It
 * is probably better that NFSD manages its own memory, so
 * even though there is copying from the downcall struct to
 * the pvfs2_inode_t and now to nfsd, this may make sense.
 * Better to optimize the downcall -> pvfs2_inode_t copy.
 */
static int
set_pvfs2_layout(struct nfsd4_pnfs_layoutget* req, void* layout, int layout_size)
{
    struct pvfs2_layout* lay_t;
    pvfs2_print("%s: Begin copying %d bytes\n",__FUNCTION__,layout_size);
    if (req->lg_layout)
    {
	pvfs2_print("%s: Existing layout, freeing existing memory\n",__FUNCTION__);
	kfree(req->lg_layout);
    }

    if (layout_size > req->lg_mxcnt)
    {
	printk("%s: Layout blob (%d) is larger than buffer (%d)\n",
	       __FUNCTION__,
	       layout_size,
	       req->lg_mxcnt);
	return -EIO;
    }

    lay_t = (struct pvfs2_layout*)kmalloc(sizeof(struct pvfs2_layout), GFP_KERNEL);
    lay_t->layout = layout;
    lay_t->length = layout_size;

    /* set return layout for nfsd */
    req->lg_layout = (void*)lay_t;

    pvfs2_print("%s: XDR PVFS2 LAYOUT\n", __FUNCTION__);
    pvfs2_print("\tblob size:%d\n", ((int*)layout)[0]);
    pvfs2_print("\tfsid:%d\n", ((int*)layout)[1]);
    pvfs2_print("\t# dfiles:%d\n", ((int*)layout)[2]);
    layout += 12;
    pvfs2_print("\tdfile 0:%llu\n", ((u64*)layout)[0]);

    return 0;
}

/* Retrieves pvfs2 pNFS layout from mds */
static int
pvfs2_pvfs2layout_getattr(struct inode * inode, int attributes)
{
    pvfs2_inode_t* pvfs2_inode = PVFS2_I(inode);
    int ret;

    pvfs2_print("%s: Start\n", __FUNCTION__);
    spin_lock(&pvfs2_layout_lock);

    /* Check if layout has already been retrieve for this inode */
    if (pvfs2_inode->layout_size <= 0)
    {
	/* perform upcall to retrieve layout */
	pvfs2_print("%s: Retrieving pNFS layout\n", __FUNCTION__);
        ret = pvfs2_inode_getattr(inode, attributes);
	if (ret || pvfs2_inode->layout_size <= 0)
	{
	    printk("%s: Error!  Could not retrieve layout (%d)\n",__FUNCTION__,ret);
	    ret = -ENOSYS;
	}
    } else {
	pvfs2_print("%s: Using cached pNFS layout\n", __FUNCTION__);
	ret = 0;
    }

    spin_unlock(&pvfs2_layout_lock);
    return ret;
}

/* Retrieves pvfs2 data layout information about the specified file.
 * return- positive 0
 * negative -ENOSYS or pvfs2_inode_getattr error
 */
static int
pvfs2_layout_get(struct inode * inode, void* buf)
{
    int ret;
    struct nfsd4_pnfs_layoutget *layout_request = (struct nfsd4_pnfs_layoutget*)buf;
    pvfs2_inode_t* pvfs2_inode = PVFS2_I(inode);

    pvfs2_print("%s: off:%Lu ex:%Lu macc:%d iomode:%d\n", __FUNCTION__,
		layout_request->lg_offset,
		layout_request->lg_length,
		layout_request->lg_mxcnt,
		layout_request->lg_iomode);

    if ((ret = pvfs2_pvfs2layout_getattr(inode, PVFS_ATTR_META_ALL)) < 0)
	return -ENOSYS;

    ret = set_pvfs2_layout(layout_request,
                           pvfs2_inode->layout,
                           pvfs2_inode->layout_size);
    if (ret)
	pvfs2_error("%s: Error!  Could not copy attributes (%d)\n",__FUNCTION__,ret);

    return ret;
}

static void
pvfs2_layout_free(void *layout)
{
    struct pvfs2_layout* lay_t;

    if (!layout)
	return;

    pvfs2_print("pvfs2: Freeing server layout struct\n");
    lay_t = (struct pvfs2_layout*)layout;
    kfree(lay_t);
}

static int
pvfs2_layout_encode(u32 *p, u32 *end, void *layout)
{
    struct pvfs2_layout *plo;

    pvfs2_print("%s: Encoding pvfs2 layout\n", __FUNCTION__);

    if (!layout)
    {
	printk("%s: ERROR! No layout to encode.\n", __FUNCTION__);
	return 0;
    }

    plo = (struct pvfs2_layout*)layout;

    pvfs2_print("%s: Layout length %d\n", __FUNCTION__, plo->length);

    /* Write length and layout */
    WRITE32(plo->length);
    WRITEMEM(plo->layout, plo->length);

    return plo->length + 4;
}

/****** NFSv4 File Layout Functions ******/
/* Encodes a nfs file pNFS layout.
 * TODO: For now, always return the devices in order,e.g., 0,1,...
 * At some point we need to re-enable randomizing the starting
 * data server in pvfs2 (io_randomized) and then call
 * PVFS_mgmt_get_dfile_array to get the in order list
 * of servers for this data file (see descend and verify_datafiles
 * in src/apps/admin/pvfs2-fs-dump.c)
*/
static int
nfs_build_layout(struct nfsd4_pnfs_layoutget* req, int num_devices)
{
    struct nfsd4_pnfs_filelayout* nfslayout;
    int i;
    static char buf[80];
    struct knfsd_fh *fh = (struct knfsd_fh*)req->lg_fh;

    pvfs2_print("%s: Start\n",__FUNCTION__);

    /* Free existing layout if it exists */
    if (req->lg_layout)
    {
	pvfs2_print("%s: Existing layout, freeing existing memory\n",__FUNCTION__);
	kfree(req->lg_layout);
    }

    nfslayout = (struct nfsd4_pnfs_filelayout*)kmalloc(sizeof(struct nfsd4_pnfs_filelayout),
						   GFP_KERNEL);

    /* Set nfs layout information */
    nfslayout->lg_commit_through_mds = 1;
    nfslayout->lg_stripe_type = 1; /* sparse */
    nfslayout->lg_file_size = 0ULL;

    /* the stripe size is in the attributes of the pvfs2 layout,
     * but for now, just hardcode it to the value of the proc variable
     */
    nfslayout->lg_stripe_unit = layout_stripesize;

    /* # dataservers == # dfiles
     * Note: This is pre-xdr'd by pvfs2 user land code
     */
    nfslayout->lg_llistlen = num_devices;
    pvfs2_print("# data servers:%d\n", nfslayout->lg_llistlen);
    if (nfslayout->lg_llistlen <= 0)
    {
	pvfs2_error("%s: No data servers!\n",__FUNCTION__);
	kfree(nfslayout);
	return -ENOSYS;
    }

    nfslayout->lg_llist = (struct nfsd4_pnfs_layoutlist*)kmalloc(
	nfslayout->lg_llistlen * sizeof(struct nfsd4_pnfs_layoutlist), GFP_KERNEL);
    if (!nfslayout->lg_llist)
    {
	pvfs2_error("%s: Could not allocate nfs device list!\n",__FUNCTION__);
	kfree(nfslayout);
	return -ENOMEM;
    }

    /* set data server and fh info */
    for (i = 0; i < nfslayout->lg_llistlen; i++) {
	nfslayout->lg_llist[i].dev_id = i;
	nfslayout->lg_llist[i].dev_index = i;
	nfslayout->lg_llist[i].fhp = fh;

        /* To edit fh, edit req->lg_fh in place as follows:
	 int SetFH(int *fhP, int sid)
	 {
	 struct knfsd_fh *fh = (struct knfsd_fh *)fhP;

	 if (fh->fh_size > 8) {
	 fh->fh_size += 4; // fh_size + 4 for sid
	 fh->fh_fsid_type += max_fsid_type;
	 fhP[(fh->fh_size >> 2)] = sid;
	 fh->fh_fileid_type = 7;

	 return 0;
	 }
	 return ENOENT;
	 }

	*/
    }

    pvfs2_print("%s: Printing fh\n", __FUNCTION__);
    sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
	    fh->fh_size,
	    fh->fh_base.fh_pad[0],
	    fh->fh_base.fh_pad[1],
	    fh->fh_base.fh_pad[2],
	    fh->fh_base.fh_pad[3],
	    fh->fh_base.fh_pad[4],
	    fh->fh_base.fh_pad[5]);

    pvfs2_print("%s:%s\n", __FUNCTION__, buf);

    /* Set layout to be encoded later */
    req->lg_layout = (void*)nfslayout;
    return 0;
}

/* Retrieves pvfs2 pNFS layout from mds
 * PVFS2_VFS_OP_GETDEVLIST
*/
static int
nfs_getdevlist_upcall(pvfs2_sb_info_t* pvfs2_sb, pvfs2_inode_t* pvfs2_inode)
{
    int ret = -EINVAL;
    pvfs2_kernel_op_t *new_op = NULL;

    pvfs2_print("%s: Start\n", __FUNCTION__);

    /* Check if devlist has already been retrieve for this inode
     * TODO: need to make this thread aware
     */
    if (pvfs2_sb->pnfs_devlist_size <= 0)
    {
	/* perform upcall to retrieve layout */
	pvfs2_print("%s: Retrieving pNFS nfsv4 device list\n", __FUNCTION__);

	new_op = op_alloc();
	if (!new_op)
	{
	    ret = -ENOMEM;
	    goto out;
	}
	new_op->upcall.type = PVFS2_VFS_OP_GETDEVLIST;
	new_op->upcall.req.getdevlist.refn = pvfs2_inode->refn;
	ret = service_operation(new_op, "pvfs2_getdevlist", 0,
				get_interruptible_flag((&pvfs2_inode->vfs_inode)));
	pvfs2_print("pvfs2_getdevlist got return value of %d\n",ret);
	if (ret || new_op->downcall.resp.getdevlist.devlist_size <= 0)
	{
	    pvfs2_error("%s: Error!  Could not retrieve device list (%d)\n",__FUNCTION__, ret);
	    op_release(new_op);
	    ret = -ENOSYS;
	    goto out; /* failure */
	}

	/* DH: Copy devlist blob for pNFS */
	pvfs2_print("%s: Server copy devicelist from userland size: %d\n",
		    __FUNCTION__,new_op->downcall.resp.getdevlist.devlist_size);
	pvfs2_sb->pnfs_devlist_size = new_op->downcall.resp.getdevlist.devlist_size;
	memcpy(pvfs2_sb->pnfs_devlist, new_op->downcall.resp.getdevlist.devlist, pvfs2_sb->pnfs_devlist_size);
	op_release(new_op);
    } else {
	pvfs2_print("%s: Using cached pNFS nfsv4 device list\n", __FUNCTION__);
	ret = 0;
    }
out:
    return ret;
}

/* Retrieves pvfs2 data layout information about the specified file.
 * return- positive 0
 * negative -ENOSYS or pvfs2_inode_getattr error
 */
static int
nfs_layout_get(struct inode * inode, void* buf)
{
    int ret, devlist_len=0;
    struct nfsd4_pnfs_layoutget *layout_request = (struct nfsd4_pnfs_layoutget*)buf;
    pvfs2_inode_t* pvfs2_inode = PVFS2_I(inode);
    pvfs2_sb_info_t* pvfs2_sb = PVFS2_SB(inode->i_sb);
    char* buffer;

    pvfs2_print("%s: off:%Lu ex:%Lu macc:%d iomode:%d\n", __FUNCTION__,
		layout_request->lg_offset,
		layout_request->lg_length,
		layout_request->lg_mxcnt,
		layout_request->lg_iomode);
    if ((ret = nfs_getdevlist_upcall(pvfs2_sb, pvfs2_inode)) < 0)
	return ret;
    buffer = pvfs2_sb->pnfs_devlist;
    pnfs_decode_int32_t(&buffer, &devlist_len);
    ret = nfs_build_layout(layout_request, devlist_len);
    if (ret)
	pvfs2_error("%s: Error!  Could not copy attributes (%d)\n",__FUNCTION__,ret);

    return ret;
}

/* Convert a encode char buffer into an array of devices.
 * The devices are then freed as they are encoded by nfsd.
*/
struct nfsd4_pnfs_devlist*
nfs_create_devices(int devlist_len, char* buf)
{
    struct nfsd4_pnfs_devlist* devlist;
    int i;
    struct pnfs_filelayout_devaddr *fdev;
    char netid[] = "tcp";
    char nfsport[] = ".8.1";
    char* temp;

    pvfs2_print("%s: Start Devs: %d\n", __FUNCTION__, devlist_len);
    devlist = (struct nfsd4_pnfs_devlist*)kmalloc(devlist_len * sizeof(struct nfsd4_pnfs_devlist), PVFS2_GFP_FLAGS);
    /* todo: ensure space alocated */
    for (i=0; i < devlist_len; i++)
    {
	devlist[i].dev_id = i;

	fdev = (struct pnfs_filelayout_devaddr*)kmalloc(
	    sizeof(struct pnfs_filelayout_devaddr), PVFS2_GFP_FLAGS);
	/* todo ensure space allocated */
	fdev->r_netid.len = 3;
	fdev->r_netid.data = (char*)kmalloc(3, PVFS2_GFP_FLAGS);
	memcpy(fdev->r_netid.data, netid, 3);

	fdev->r_addr.len = bmitoh32(*(int32_t*)buf);
	fdev->r_addr.data = (char*)kmalloc(fdev->r_addr.len + 4, PVFS2_GFP_FLAGS);
	pnfs_decode_string(&buf, &temp);
	memcpy(fdev->r_addr.data, temp, fdev->r_addr.len);

	/* add port */
	memcpy(fdev->r_addr.data + fdev->r_addr.len, nfsport, 4);
	/* Increase by 4 to add the nfs port 2049 */
	fdev->r_addr.len += 4;

	pvfs2_print("%s: raddrlen: %d raddr: %s\n",
		    __FUNCTION__, fdev->r_addr.len, fdev->r_addr.data);
	devlist[i].dev_addr = (void*)fdev;
    }

    pvfs2_print("%s: End\n", __FUNCTION__);
    return devlist;
}

static int
nfs_getdevicelist(struct super_block *sb, void *buf)
{
    int ret, devlist_len=0;
    pvfs2_sb_info_t* pvfs2_sb = PVFS2_SB(sb);
    struct inode* inode = sb->s_root->d_inode;
    pvfs2_inode_t* pvfs2_inode = PVFS2_I(inode);
    struct nfsd4_pnfs_getdevlist *gdevl = (struct nfsd4_pnfs_getdevlist*)buf;
    char* buffer;

    pvfs2_print("%s: Start\n", __FUNCTION__);

    if ((ret = nfs_getdevlist_upcall(pvfs2_sb, pvfs2_inode)) < 0)
	return ret;

    buffer = pvfs2_sb->pnfs_devlist;
    pnfs_decode_int32_t(&buffer, &devlist_len);
    gdevl->gd_devlist = nfs_create_devices(devlist_len, buffer);
    gdevl->gd_devlist_len = devlist_len;

    pvfs2_print("%s: End (ret: %d) (len: %d)\n", __FUNCTION__, ret, devlist_len);

    return ret;
}

/* Retrieves pvfs2 data layout information about the specified file.
 * return- positive 0
 * negative -ENOSYS or pvfs2_inode_getattr error
 */
static int
nfsmanual_layout_get(struct inode * inode, void* buf)
{
    int ret;
    struct nfsd4_pnfs_layoutget *layout_request = (struct nfsd4_pnfs_layoutget*)buf;
    pvfs2_print("%s: off:%Lu ex:%Lu macc:%d iomode:%d\n", __FUNCTION__,
		layout_request->lg_offset,
		layout_request->lg_length,
		layout_request->lg_mxcnt,
		layout_request->lg_iomode);
    ret = nfs_build_layout(layout_request, nfsmanual_num_devices);
    if (ret)
	pvfs2_error("%s: Error!  Could not copy attributes (%d)\n",__FUNCTION__,ret);

    return ret;
}

/* Generate nfs file device list from devices specified in the /proc fs */
static int
nfsmanual_getdevicelist(struct super_block *sb, void *buf)
{
    int i=0;
    struct nfsd4_pnfs_getdevlist *gdevl = (struct nfsd4_pnfs_getdevlist*)buf;
    struct nfsd4_pnfs_devlist* devlist;
    struct pnfs_filelayout_devaddr *fdev;
    char netid[] = "tcp";
    char nfsport[] = ".8.1";
    char* t1, *t2;
    char devs[PNFS_DATASERVER_LEN];

    pvfs2_print("%s: Start\n", __FUNCTION__);
    pvfs2_print("%s: layout_ds string %s\n", __FUNCTION__, layout_dsnames);

    /* Use at most 8 (number of iota machines) data servers */
    devlist = (struct nfsd4_pnfs_devlist*)kmalloc(8 * sizeof(struct nfsd4_pnfs_devlist), PVFS2_GFP_FLAGS);

    /* copy devices from proc variable to ensure we don't modify
     * (which seemed to be happened via strsep)
     */
    memcpy(devs, layout_dsnames, PNFS_DATASERVER_LEN);
    t2 = devs;

    /* todo: ensure space alocated */
    while ((t1 = strsep(&t2, ","))) {
	if (!*t1)
	    continue;
	pvfs2_print("%s: Adding device %s\n", __FUNCTION__, t1);
	devlist[i].dev_id = i;

	fdev = (struct pnfs_filelayout_devaddr*)kmalloc(
	    sizeof(struct pnfs_filelayout_devaddr), PVFS2_GFP_FLAGS);
	/* todo ensure space allocated */
	fdev->r_netid.len = 3;
	fdev->r_netid.data = (char*)kmalloc(3, PVFS2_GFP_FLAGS);
	memcpy(fdev->r_netid.data, netid, 3);

	fdev->r_addr.len = strlen(t1);
	fdev->r_addr.data = (char*)kmalloc(fdev->r_addr.len + 4, PVFS2_GFP_FLAGS);
	memcpy(fdev->r_addr.data, t1, fdev->r_addr.len);

	/* add port */
	memcpy(fdev->r_addr.data + fdev->r_addr.len, nfsport, 4);
	/* Increase by 4 to add the nfs port 2049 */
	fdev->r_addr.len += 4;

	pvfs2_print("%s: raddrlen: %d raddr: %s\n",
		    __FUNCTION__, fdev->r_addr.len, fdev->r_addr.data);
	devlist[i].dev_addr = (void*)fdev;
	i++;
    }

    pvfs2_print("%s: End\n", __FUNCTION__);

    gdevl->gd_devlist = devlist;
    gdevl->gd_devlist_len = i;

    /* Set number of devices for layoutget.  This is
     * ok since a client must always retrieve a list of
     * devices before it retrieves the layout */
    nfsmanual_num_devices = i;

    pvfs2_print("%s: End (len: %d)\n", __FUNCTION__, gdevl->gd_devlist_len);

    return 0;
}

/* export ops for each layout type */
struct export_operations pvfs2layout_export_ops =
{
    .layout_type   = pvfs2_layout_type,
    .layout_get    = pvfs2_layout_get,
    .layout_free   = pvfs2_layout_free,
    .layout_encode = pvfs2_layout_encode,
};

struct export_operations nfslayout_export_ops =
{
    .layout_type    = pvfs2_layout_type,
    .layout_get     = nfs_layout_get,
    .get_devicelist = nfs_getdevicelist,
};

struct export_operations nfsmanuallayout_export_ops =
{
    .layout_type    = pvfs2_layout_type,
    .layout_get     = nfsmanual_layout_get,
    .get_devicelist = nfsmanual_getdevicelist,
};

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */
