Josh-D. S. Davis

Xaminmo / Omnimax / Max Omni / Mad Scientist / Midnight Shadow / Radiation Master

Previous Entry Share Next Entry
zram / compcache / ramzswap (like Active Memory Expansion, but for Linux, not pHYP).
Josh 201604 KWP
joshdavis
Linux's compcache (compressed cache) became ramzswap.
This is like "Active Memory Sharing" for pSeries Hypervisor.
Linux's implementation is as an LZO compressed block device in kernel RAM.

As development went on, there was no need to limit this to paging space. Now, it's called zram. It's not super super stable, but it seems to work well enough when you leave it alone.

Linux already supports "priority" for paging spaces (ie, hierarchically used in order of best to worst). As such, backing devices are moot anyway. We'll just store non-compressible junk in RAM. It's rare, and not any worse than if there were no LZO.

zram is single-threaded, which is disappointing; however, its' performance is about 50% of RAM, and about 800% of disk. It's also allowed to run multiple devices at the same priority and each device will sit on its own core.


[root@ns1:/etc/iet]
/bin/bash# modprobe zram num_devices=2

[root@ns1:/etc/iet]
/bin/bash# ll /dev/zram*
brw-rw---- 1 root disk 252, 0 Feb  7 00:57 /dev/zram0
brw-rw---- 1 root disk 252, 1 Feb  7 00:57 /dev/zram1

[root@ns1:/etc/iet]
/bin/bash# echo $((1024*1024*1024)) > /sys/block/zram0/disksize

[root@ns1:/etc/iet]
/bin/bash# echo $((1024*1024*256)) > /sys/block/zram1/disksize

[root@ns1:/etc/iet]
/bin/bash# mkfs.ext4 /dev/zram1
mke2fs 1.41.12 (17-May-2010)
Filesystem label=
OS type: Linux
Block size=4096 (log=2)
Fragment size=4096 (log=2)
Stride=1 blocks, Stripe width=1 blocks
65536 inodes, 65536 blocks
3276 blocks (5.00%) reserved for the super user
First data block=0
Maximum filesystem blocks=67108864
2 block groups
32768 blocks per group, 32768 fragments per group
32768 inodes per group
Superblock backups stored on blocks:
        32768

Writing inode tables: done
Creating journal (4096 blocks): done
Writing superblocks and filesystem accounting information: done

This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first.  Use tune2fs -c or -i to override.

[root@ns1:/etc/iet]
/bin/bash# mkswap /dev/zram0
Setting up swapspace version 1, size = 1048572 KiB
no label, UUID=458902bd-f10c-4d33-93df-6c0f40ed4cdc

[root@ns1:/etc/iet]
/bin/bash# swapon /dev/zram0 -p10

[root@ns1:/etc/iet]
/bin/bash# swapon -s
Filename                                Type            Size    Used    Priority
/dev/dm-11                              partition       4194300 0       -1
/dev/zram0                              partition       1048572 0       10

[root@ns1:/etc/iet]
/bin/bash# top
top - 01:00:24 up  1:45,  2 users,  load average: 4.42, 4.32, 4.56
Tasks: 242 total,   2 running, 240 sleeping,   0 stopped,   0 zombie
Cpu0  :  0.0%us, 13.9%sy,  0.0%ni, 35.3%id, 47.9%wa,  0.0%hi,  3.0%si,  0.0%st
Cpu1  :  0.3%us, 16.6%sy,  0.0%ni,  1.7%id, 81.1%wa,  0.0%hi,  0.3%si,  0.0%st
Mem:   3632384k total,  3515148k used,   117236k free,    57824k buffers
Swap:  5242872k total,        0k used,  5242872k free,  3237636k cached
[SNIP]

[root@ns1:/etc/iet]
/bin/bash# mount /dev/zram1 /mnt

[root@ns1:/etc/iet]
/bin/bash# dd if=/dev/zero of=/mnt/deleteme bs=256k

[root@ns1:/etc/iet]
/bin/bash# umount /mnt

[root@ns1:/root]
/bin/bash# cd /sys/block/zram1

[root@ns1:/sys/block/zram1]
/bin/bash# ll
total 0
drwxr-xr-x  6 root root    0 Feb  7 01:24 ./
drwxr-xr-x 20 root root    0 Feb  7 01:24 ../
-r--r--r--  1 root root 4096 Feb  7 01:24 alignment_offset
lrwxrwxrwx  1 root root    0 Feb  7 01:24 bdi -> ../../bdi/252:1/
-r--r--r--  1 root root 4096 Feb  7 01:24 capability
-r--r--r--  1 root root 4096 Feb  7 01:24 compr_data_size
-r--r--r--  1 root root 4096 Feb  7 01:24 dev
-r--r--r--  1 root root 4096 Feb  7 01:24 discard
-r--r--r--  1 root root 4096 Feb  7 01:24 discard_alignment
-rw-r--r--  1 root root 4096 Feb  7 00:58 disksize
-r--r--r--  1 root root 4096 Feb  7 01:24 ext_range
drwxr-xr-x  2 root root    0 Feb  7 01:24 holders/
-r--r--r--  1 root root 4096 Feb  7 01:24 inflight
-r--r--r--  1 root root 4096 Feb  7 01:24 initstate
-r--r--r--  1 root root 4096 Feb  7 01:24 invalid_io
-r--r--r--  1 root root 4096 Feb  7 01:24 mem_used_total
-r--r--r--  1 root root 4096 Feb  7 01:24 notify_free
-r--r--r--  1 root root 4096 Feb  7 01:24 num_reads
-r--r--r--  1 root root 4096 Feb  7 01:24 num_writes
-r--r--r--  1 root root 4096 Feb  7 01:24 orig_data_size
drwxr-xr-x  2 root root    0 Feb  7 01:24 power/
drwxr-xr-x  2 root root    0 Feb  7 01:24 queue/
-r--r--r--  1 root root 4096 Feb  7 01:24 range
-r--r--r--  1 root root 4096 Feb  7 01:24 removable
--w-------  1 root root 4096 Feb  7 01:24 reset
-r--r--r--  1 root root 4096 Feb  7 01:24 ro
-r--r--r--  1 root root 4096 Feb  7 01:24 size
drwxr-xr-x  2 root root    0 Feb  7 01:24 slaves/
-r--r--r--  1 root root 4096 Feb  7 01:24 stat
lrwxrwxrwx  1 root root    0 Feb  7 01:24 subsystem -> ../../../../class/block/
-rw-r--r--  1 root root 4096 Feb  7 01:24 uevent
-r--r--r--  1 root root 4096 Feb  7 01:24 zero_pages

[root@ns1:/sys/block/zram1]
/bin/bash# cat size
524288

[root@ns1:/sys/block/zram1]
/bin/bash# cat disksize
268435456

[root@ns1:/sys/block/zram1]
/bin/bash# cat compr_data_size
3738

[root@ns1:/sys/block/zram1]
/bin/bash# cat mem_used_total
8192

[root@ns1:/sys/block/zram1]
/bin/bash# echo 0 > disksize
-su: echo: write error: Device or resource busy

[root@ns1:/sys/block/zram1]
/bin/bash# echo 1 > discard
-su: discard: Permission denied

[root@ns1:/sys/block/zram1]
/bin/bash# echo 1 > reset

[root@ns1:/sys/block/zram1]
/bin/bash# cat disksize
929792000

[root@ns1:/sys/block/zram1]
/bin/bash# cat mem_used_total
0

[root@ns1:/sys/block/zram1]
/bin/bash# cat stat
       0        0        0        0        0        0        0        0        0        0        0

[root@ns1:/sys/block/zram1]
/bin/bash# cat zero_pages
Killed

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: last sysfs file: /sys/devices/virtual/block/zram1/zero_pages

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: Process cat (pid: 15803, ti=f1302000 task=f29d1c70 task.ti=f1302000)

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: Oops: 0000 [#1] PREEMPT SMP

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: CR2: 0000000034c78048

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: EIP: [] zram_get_stat+0x5b/0xb0 [zram] SS:ESP 0068:f1303eec

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: Stack:

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: Code: 00 a1 88 de 55 c0 83 c1 01 e8 82 74 3a c8 3b 05 00 85 70 c0 89 44 24 0c 7d 34 8b 04 24 8b 48 30 8b 44 24 0c 03 0c 85 00 8a 70 c0 <8b> 41 48 eb 02 89 d0 a8 01 75 30 8b 1c f9 8b 74 f9 04 8b 51 48

Message from syslogd@ns1 at Mon Feb  7 01:26:02 2011 ...
ns1 kernel: Call Trace:

[root@ns1:/sys/block/zram1]
/bin/bash# dmesg | tail -32
r8169 0000:05:02.0: eth1: link up
EXT4-fs (zram1): mounted filesystem with ordered data mode. Opts: (null)
Cannot change disksize for initialized device
BUG: unable to handle kernel paging request at 34c78048
IP: [] zram_get_stat+0x5b/0xb0 [zram]
*pde = 00000000
Oops: 0000 [#1] PREEMPT SMP
last sysfs file: /sys/devices/virtual/block/zram1/zero_pages
Modules linked in: zram(C) ppdev lp af_packet ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi crc32c vmnet parport_pc parport vmblock vsock vmci vmmon acpi_cpufreq mperf cpufreq_stats cpufreq_conservative cpufreq_userspace cpufreq_powersave cpufreq_ondemand freq_table microcode nfsd exportfs nfs fscache nfs_acl auth_rpcgss ipv6 bridge stp deflate ctr twofish_i586 twofish_common camellia serpent blowfish cast5 des_generic cbc aes_i586 aes_generic xcbc rmd160 sha512_generic sha256_generic hmac crypto_null af_key ext4 jbd2 crc16 dm_snapshot dm_mirror dm_region_hash dm_log coretemp fuse raid10 raid1 raid0 nf_nat_pptp nf_conntrack_pptp nf_conntrack_proto_gre nf_nat_proto_gre iptable_nat ip_tables nf_nat x_tables nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 sha1_generic arc4 ecb ppp_mppe ppp_deflate zlib_deflate bsd_comp ppp_generic slhc vfat fat lockd sunrpc radeon ttm drm_kms_helper drm dcdbas rtc_cmos i3000_edac agpgart rng_core rtc_core pcspkr psmouse serio_raw evdev tpm_tis i2c_algo_bit rtc_lib edac_core tpm i2c_i801 tpm_bios i2c_core shpchp pci_hotplug button processor ext3 jbd mbcache dm_mod raid456 md_mod async_raid6_recov async_pq raid6_pq async_xor xor async_memcpy async_tx sg usbhid sr_mod sd_mod hid cdrom pata_acpi ata_generic ata_piix sata_sil24 tg3 uhci_hcd libata r8169 mii ehci_hcd [last unloaded: scsi_wait_scan]

Pid: 15803, comm: cat Tainted: G        WC  2.6.37-0.dmz.5-liquorix-686 #1 0YH299/PowerEdge SC440
EIP: 0060:[] EFLAGS: 00010206 CPU: 1
EIP is at zram_get_stat+0x5b/0xb0 [zram]
EAX: 00000000 EBX: 00000000 ECX: 34c78000 EDX: 00000000
ESI: 00000000 EDI: 00000006 EBP: f1303f08 ESP: f1303eec
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process cat (pid: 15803, ti=f1302000 task=f29d1c70 task.ti=f1302000)
Stack:
 f29f6234 00000000 00000000 00000000 f2230000 00000001 f19ab650 00000002
 f8002080 00000000 00000000 00000000 00020000 f8003274 fffffffb f8002030
 f21fe5d4 c0449921 f21fe5c0 f1303f9c c058802c c0350d19 00000000 f4cf6580
Call Trace:
 [] ? zero_pages_show+0x50/0x70 [zram]
 [] ? zero_pages_show+0x0/0x70 [zram]
 [] ? dev_attr_show+0x21/0x50
 [] ? sysfs_read_file+0x99/0x140
 [] ? sysfs_read_file+0x0/0x140
 [] ? vfs_read+0x9d/0x160
 [] ? sys_read+0x41/0x80
 [] ? sysenter_do_call+0x12/0x28
Code: 00 a1 88 de 55 c0 83 c1 01 e8 82 74 3a c8 3b 05 00 85 70 c0 89 44 24 0c 7d 34 8b 04 24 8b 48 30 8b 44 24 0c 03 0c 85 00 8a 70 c0 <8b> 41 48 eb 02 89 d0 a8 01 75 30 8b 1c f9 8b 74 f9 04 8b 51 48
EIP: [] zram_get_stat+0x5b/0xb0 [zram] SS:ESP 0068:f1303eec
CR2: 0000000034c78048
---[ end trace b5622529bbc31508 ]---

[root@ns1:/sys/block/zram1]
/bin/bash#

?

Log in

No account? Create an account