[root@clei22 ~]# lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 16
On-line CPU(s) list: 0-15
Thread(s) per core: 1
Core(s) per socket: 8
Socket(s): 2
NUMA node(s): 4
Vendor ID: AuthenticAMD
CPU family: 16
Model: 9
Model name: AMD Opteron(tm) Processor 6128
Stepping: 1
CPU MHz: 2000.000
BogoMIPS: 4000.38
Virtualization: AMD-V
L1d cache: 64K
L1i cache: 64K
L2 cache: 512K
L3 cache: 5118K
NUMA node0 CPU(s): 0-3
NUMA node1 CPU(s): 4-7
NUMA node2 CPU(s): 12-15
NUMA node3 CPU(s): 8-11
[root@clei22 ~]# lsscsi
[2:0:0:0] disk ATA INTEL SSDSC2CW24 400i /dev/sda
[3:0:0:0] disk ATA HGST HUS724040AL AA70 /dev/sdb
[4:0:0:0] disk ATA WDC WD2002FYPS-0 1G01 /dev/sdc
[root@clei22 ~]# pvs ;vgs;lvs
PV VG Fmt Attr PSize PFree
/dev/mapper/INTEL_SSDSC2CW240A3_CVCV306302RP240CGN vg_cache lvm2 a-- 223.57g 0
/dev/sdc2 centos_clei22 lvm2 a-- 1.82t 64.00m
VG #PV #LV #SN Attr VSize VFree
centos_clei22 1 3 0 wz--n- 1.82t 64.00m
vg_cache 1 2 0 wz--n- 223.57g 0
LV VG Attr LSize Pool Origin Data% Meta% Move Log Cpy%Sync Convert
home centos_clei22 -wi-ao---- 1.74t
root centos_clei22 -wi-ao---- 50.00g
swap centos_clei22 -wi-ao---- 31.44g
lv_cache vg_cache -wi-ao---- 213.57g
lv_slog vg_cache -wi-ao---- 10.00g
[root@clei22 ~]# zpool status -v
pool: zclei22
state: ONLINE
scan: scrub repaired 0 in 0h0m with 0 errors on Tue Feb 28 14:16:07 2017
config:
NAME STATE READ WRITE CKSUM
zclei22 ONLINE 0 0 0
HGST_HUS724040ALA640_PN2334PBJ4SV6T1 ONLINE 0 0 0
logs
lv_slog ONLINE 0 0 0
cache
lv_cache ONLINE 0 0 0
errors: No known data errors
ZFS config:
[root@clei22 ~]# zfs get all zclei22/01
NAME PROPERTY VALUE SOURCE
zclei22/01 type filesystem -
zclei22/01 creation Tue Feb 28 14:06 2017 -
zclei22/01 used 389G -
zclei22/01 available 3.13T -
zclei22/01 referenced 389G -
zclei22/01 compressratio 1.01x -
zclei22/01 mounted yes -
zclei22/01 quota none default
zclei22/01 reservation none default
zclei22/01 recordsize 128K local
zclei22/01 mountpoint /zclei22/01 default
zclei22/01 sharenfs off default
zclei22/01 checksum on default
zclei22/01 compression off local
zclei22/01 atime on default
zclei22/01 devices on default
zclei22/01 exec on default
zclei22/01 setuid on default
zclei22/01 readonly off default
zclei22/01 zoned off default
zclei22/01 snapdir hidden default
zclei22/01 aclinherit restricted default
zclei22/01 canmount on default
zclei22/01 xattr sa local
zclei22/01 copies 1 default
zclei22/01 version 5 -
zclei22/01 utf8only off -
zclei22/01 normalization none -
zclei22/01 casesensitivity sensitive -
zclei22/01 vscan off default
zclei22/01 nbmand off default
zclei22/01 sharesmb off default
zclei22/01 refquota none default
zclei22/01 refreservation none default
zclei22/01 primarycache metadata local
zclei22/01 secondarycache metadata local
zclei22/01 usedbysnapshots 0 -
zclei22/01 usedbydataset 389G -
zclei22/01 usedbychildren 0 -
zclei22/01 usedbyrefreservation 0 -
zclei22/01 logbias latency default
zclei22/01 dedup off default
zclei22/01 mlslabel none default
zclei22/01 sync disabled local
zclei22/01 refcompressratio 1.01x -
zclei22/01 written 389G -
zclei22/01 logicalused 396G -
zclei22/01 logicalreferenced 396G -
zclei22/01 filesystem_limit none default
zclei22/01 snapshot_limit none default
zclei22/01 filesystem_count none default
zclei22/01 snapshot_count none default
zclei22/01 snapdev hidden default
zclei22/01 acltype off default
zclei22/01 context none default
zclei22/01 fscontext none default
zclei22/01 defcontext none default
zclei22/01 rootcontext none default
zclei22/01 relatime off default
zclei22/01 redundant_metadata all default
zclei22/01 overlay off default
File content:
2x50GB VM thinprovisioned
4x100GB VMdisk preallocated
Gluster Volume config:
[root@clei22 ~]# gluster volume info
Volume Name: GluReplica
Type: Replicate
Volume ID: ee686dfe-203a-4caa-a691-26353460cc48
Status: Started
Snapshot Count: 0
Number of Bricks: 1 x (2 + 1) = 3
Transport-type: tcp,rdma
Bricks:
Brick1: 10.10.10.44:/zclei22/01/glu
Brick2: 10.10.10.42:/zclei21/01/glu
Brick3: 10.10.10.41:/zclei26/01/glu (arbiter)
Options Reconfigured:
network.ping-timeout: 30
performance.readdir-ahead: on
nfs.disable: on
performance.quick-read: off
performance.read-ahead: off
performance.io-cache: off
performance.stat-prefetch: off
cluster.eager-lock: enable
network.remote-dio: enable
cluster.quorum-type: auto
cluster.server-quorum-type: server
features.shard: on
cluster.data-self-heal-algorithm: full
storage.owner-uid: 36
storage.owner-gid: 36
server.allow-insecure: on
GLUSTER primitive TESTS COLD:
time find /zclei22/01/glu/ -type d | wc -l
55473
real 5m58.248s
user 0m0.752s
sys 0m7.649s
[root@clei22 ~]# time find /zclei22/01/glu/ -type f | wc -l
215011
real 6m11.178s
user 0m0.873s
sys 0m9.385s
GLUSTER primitive TESTS WORM:
time find /zclei22/01/glu/ -type d | wc -l
55473
real 0m2.719s
user 0m0.400s
sys 0m2.323s
time find /zclei22/01/glu/ -type f | wc -l
215011
real 0m2.828s
user 0m0.478s
sys 0m2.376s
Adding Infniband:
yum install ibutils.x86_64 rdma.noarch infiniband-diags.x86_64
yum install -y libmlx4.x86_64
bstat
CA 'mlx4_0'
CA type: MT26428
Number of ports: 1
Firmware version: 2.7.700
Hardware version: b0
Node GUID: 0x002590ffff163758
System image GUID: 0x002590ffff16375b
Port 1:
State: Active
Physical state: LinkUp
Rate: 10
Base lid: 273
LMC: 0
SM lid: 3
Capability mask: 0x02590868
Port GUID: 0x002590ffff163759
Link layer: InfiniBand
Not bad for the old SDR switch ! :-P
qperf clei22.vib ud_lat ud_bw
ud_lat:
latency = 23.6 us
ud_bw:
send_bw = 981 MB/sec
recv_bw = 980 MB/sec
Let us kill and restore the glusterfs:
1) pull out /dev/sda
2) format to xfs
3) destroy xfs: zpool create ...etc...
4) recover old glusterfs config:
[root@clei21 ~]# cat heal.shTerrible performance on zfs+glusterfs :(
(vol=GluReplica; brick=/zclei21/01/glu;setfattr -n trusted.glusterfs.volume-id -v 0x$(grep volume-id /var/lib/glusterd/vols/$vol/info | cut -d= -f2 | sed 's/-//g') $brick) gluster volume heal GluReplica full
Receiver Brick:
root@clei21 ~]# arcstat.py 1Sender Brick:
time read miss miss% dmis dm% pmis pm% mmis mm% arcsz c
13:24:49 0 0 0 0 0 0 0 0 0 4.6G 31G
13:24:50 154 80 51 80 51 0 0 80 51 4.6G 31G
13:24:51 179 62 34 62 34 0 0 62 42 4.6G 31G
13:24:52 148 68 45 68 45 0 0 68 45 4.6G 31G
13:24:53 140 64 45 64 45 0 0 64 45 4.6G 31G
13:24:54 124 48 38 48 38 0 0 48 38 4.6G 31G
13:24:55 157 80 50 80 50 0 0 80 50 4.7G 31G
13:24:56 202 68 33 68 33 0 0 68 41 4.7G 31G
13:24:57 127 54 42 54 42 0 0 54 42 4.7G 31G
13:24:58 126 50 39 50 39 0 0 50 39 4.7G 31G
13:24:59 116 40 34 40 34 0 0 40 34 4.7G 31G
[root@clei22 ~]# arcstat.py 1
time read miss miss% dmis dm% pmis pm% mmis mm% arcsz c
13:28:37 8 2 25 2 25 0 0 2 25 468M 31G
13:28:38 1.2K 727 62 727 62 0 0 525 54 469M 31G
13:28:39 815 508 62 508 62 0 0 376 55 469M 31G
13:28:40 994 624 62 624 62 0 0 450 54 469M 31G
13:28:41 783 456 58 456 58 0 0 338 50 470M 31G
13:28:42 916 541 59 541 59 0 0 390 50 470M 31G
13:28:43 768 437 56 437 57 0 0 313 48 471M 31G
13:28:44 877 534 60 534 60 0 0 393 53 470M 31G
13:28:45 957 630 65 630 65 0 0 450 57 470M 31G
13:28:46 819 479 58 479 58 0 0 357 51 471M 31G