Friday, April 27, 2012

Linux 10 - Process Text Streams using filters

 

cat, cut, expand, fmt, head, od, join, nl, paste, pr, sed, sort, split, tail, tr unexpand, uniq, wc, xargs

cat
concatenate files and print on the standard output

Flags:

-n number all output lines
-T, --show-tabs display TAB characters as ^I
-E, --show-ends display $ at end of each line
# cat hello.txt hello2.txt 
This is a test
a big test
.
hello this is spaced by tabs
#
# cat hello.txt hello2.txt > hello3.txt ; cat hello3.txt
This is a test
a big test
.
hello this is spaced by tabs
#

cut
remove sections from each line of files

Flags:

-b, --bytes=LIST

select only these bytes

-c, --characters=LIST

select only these characters

-d, --delimiter=DELIM

use DELIM instead of TAB for field delimiter

-f, --fields=LIST

select only these fields
also print any line that contains no delimiter character,
unless the -s option is specified

# cut -c 2,3,4,5 hello.txt
his
big
#cut -b 1hello.txt
T
a
.
#cut -b 1,2,3,4hello.txt
This
a bi
.
# cut -b 1-6 hello.txt
This i
a big
.
# cut -f1 -d' ' hello.txt
This
a
.
# cut -f2 -d' ' hello.txt
is
big
.


expand
convert tabs to spaces

-i --initial

do not convert tabs after non blanks

-t --tabs=NUMBER have tabs NUMBER characters apart, not 8
-t --tabs=LIST use comma separated list of explicit tab positions

fmt
simple optimal text formatter

# cat hello.txt ; fmt hello.txt ; fmt w 5 hello.txt
This is a test
a big test
.
This is a test a big test
This
is a
test
a
big
test
.
#

head
output the first part of files

# head /var/log/syslog
Feb 24 12:32:46 debian rsyslogd: [origin software="rsyslogd" swVersion="4.6.4"
xpid="1106" xinfo="http://www.rsyslog.com"] rsyslogd was HUPed, type 'lightweight'.
Feb 24 12:33:26 debian anacron[1229]: Job `cron.daily' terminated
Feb 24 12:33:26 debian anacron[1229]: Normal exit (1 job run)
Feb 24 12:35:23 debian mptstatusd: detected nonoptimal RAID status
Feb 24 12:40:21 debian dhclient: DHCPREQUEST on eth0 to 192.168.189.254 port 67
Feb 24 12:40:21 debian dhclient: DHCPACK from 192.168.189.254
Feb 24 12:40:21 debian NetworkManager[1643]: (eth0): DHCPv4 state changed bound > renew

 

# head n 2 /var/log/syslog
Feb 24 12:32:46 debian rsyslogd: [origin software="rsyslogd" swVersion="4.6.4"
xpid="1106" xinfo="http://www.rsyslog.com"] rsyslogd was HUPed, type 'lightweight'.
Feb 24 12:33:26 debian anacron[1229]: Job `cron.daily' terminated
#

od
dump files in octal and other formats

# od hello.txt
0000000 064124 071551 064440 020163 020141 062564 072163 060412
0000020 061040 063551 072040 071545 005164 005056
0000034
# od -c hello.txt
0000000 T h i s i s a t e s t \n a
0000020 b i g t e s t \n . \n
0000034
#

join
join lines of two files on a common field

# cat 1.txt
10 blue
20 red
30 purple
40 orange
50 black
# cat 2.txt
10 socks
20 shirts
30 pants
40 hats
50 gloves
# join 1.txt 2.txt
10 blue socks
20 red shirts
30 purple pants
40 orange hats
50 black gloves
#

nl
number lines of files

# nl hello.txt
1 This is a test
2 a big test
3 .
# nl 2.txt
1 10 socks
2 20 shirts
3 30 pants
4 40 hats
5 50 gloves
#

paste
merge lines of files

# cat 1.txt
10 blue
20 red
30 purple
40 orange
50 black
# cat 2.txt
10 socks
20 shirts
30 pants
40 hats
50 gloves
# paste 1.txt 2.txt
10 blue 10 socks
20 red 20 shirts
30 purple 30 pants
40 orange 40 hats
50 black 50 gloves
#

pr
converts text files to printing

# pr hello.txt
20120224 15:25 hello.txt Page 1
This is a test
a big test
.

sed
stream editor for filtering and transforming text

# cat 2.txt
10 socks
20 shirts
30 pants
40 hats
50 gloves
# sed -e 's/pants/dresses/' 2.txt
10 socks
20 shirts
30 dresses
40 hats
50 gloves
# sed -e 's/0/chicken/' 2.txt
1chicken socks
2chicken shirts
3chicken pants
4chicken hats
5chicken gloves
# sed -e 's/0/chicken/' 2.txt | sed -e's/cken/mp/'
1chimp socks
2chimp shirts
3chimp pants
4chimp hats
5chimp gloves

sort
sort lines of text files

  • normal sort
# sort hello.txt 
.
a big test
This is a test
  • revserse sort
# sort r hello.txt 
This is a test
a big test
.
  • random sort
# sort R hello.txt 
a big test
This is a test
.
root@debian:~#

split
splits a file into pieces

-- by number of bytes
-- by number of lines
-- helps breaking up things into more manageble files

split -l 2 1.txt
splits 2 liner per file

split -b 5 1.txt
splits into 5 bytes per file

tail
output the last part of files

 

  1. tail /var/log/dmesg
  2. tail n /var/log/dmesg
  3. tail f /var/log/dmes
  4. tail c ## /var/log/dmesg

 

  1. shows last 10 lines
  2. shows last n lines
  3. watch last 10 lines in 2 sec. interval
  4. shows last X bytes

tr
translate or delete characters

tr -t ABCDEFGHILJKLMNOPQRSTUVXYZ a-z
tr -t L l
tr -t ABCDEFGHILJKL afasdLfsdfasdfsdf
tr -d L
tr -s L

1. translates characters
2. same as above
3. gibberish
4. delete
5. squeeze same characters ( make ll into a single l )

root@debian:~# echo 'HELLO' | tr t ABCDEFGHILJKL afasdfsdfasdfsdf
ddffO
root@debian:~# echo 'HELLO' | tr t ABCDEFGHIJKLMNOPQRSTUVXYZ az
hello
root@debian:~# echo 'HELLO' | tr t L l
HEllO
root@debian:~# echo 'HELLO' | tr d L
HEO
root@debian:~# echo 'HELLO' | tr s L
HELO

unexpand

  • by default unexpand command only if there is blanks on the beginning
  • unexpand a for all the blanks will create tabs instead of space
  • you can set how long a tab is

uniq

if there is duplicate lines it will show you one of them
–c will count and show the occurences
–u only shou single values that are not repeated
–d only show duplicated values

example:
uniq d /var/log/messges shows repeated errors or log events


wc
print newline, word, and byte counts for each file
–n n n stands for “linex, words, characters”
–l only lines
–* will show the files in the dir

xargs
build and execute command lines from standard input


No comments:

Post a Comment