version upgrade

got rid of that horrible banlist array/function thing, now it's hard coded. On a dual core system, this saves literally 0.1 seconds on execution, about 10-20% of execution time. this also fixes a bug that has always existed, which is if you use spaces to replace instead of ''. Another advantage of this new method it fixes the fact that each and every string character would be considered a match, null replaced with null. This was due to the fact that each banlist string started with |, ie, it contained a null character in the search pattern... sigh. This is why we don't use stupid bash tricks, ok people? They hide bugs, and make the code write only.
2024-11-17 00:31:19 +00:00 · 2011-06-20 21:58:17 +00:00 · 2011-06-20 21:58:17 +00:00 · 06cc997157
parent 6024b3b387
commit 06cc997157
1 changed files with 35 additions and 75 deletions
--- a/108
+++ b/108
@ -1,7 +1,7 @@
 #!/bin/bash
 ########################################################################
 ####  Script Name: inxi
-####  version: 1.7.4
+####  version: 1.7.5
 ####  Date: June 20 2011
 ####  Patch Number: 00
 ########################################################################
@ -57,7 +57,7 @@
 ####	http://lists.gnu.org/archive/html/bug-bash/2004-08/msg00144.html
 ####  Bash 3.1 for proper array use
 ####
-####	Arrays work in bash 2.05b, but "egrep -m" does not
+####	Arrays work in bash 2.05b, but "grep -Em" does not
 ####
 ####  RECOMMENDS (Needed to run certain features, listed by option)
 ####  -A - for output of usb audio information: lsusb (usbutils)
@ -112,19 +112,20 @@
 ####  Arrays should start with a_ (local) or A_ (global).
 ####
 ####  SPECIAL NOTES:
-####  The color variable ${C2} must always be followed by a space unless you know what
-####  character is going to be next for certain. Otherwise irc color codes can be accidentally
-####  activated or altered.
-####
-####  For native script konversation support (check distro for correct konvi scripts path):
-####  ln -s <path to inxi> /usr/share/apps/konversation/scripts/inxi
-####  DCOP doesn't like \n, so avoid using it for most output unless required, as in error messages.
+####  * The color variable ${C2} must always be followed by a space unless you know what
+####    character is going to be next for certain. Otherwise irc color codes can be accidentally
+####    activated or altered.
+####  * For native script konversation support (check distro for correct konvi scripts path):
+####    ln -s <path to inxi> /usr/share/apps/konversation/scripts/inxi
+####    DCOP doesn't like \n, so avoid using it for most output unless required, as in error messages.
+####  * print_screen_output " " # requires space, not null, to avoid error in for example in irssi
+####  * For logging of array data, array must be placed into the temp_array, otherwise only the first key logs
+####  * In gawk search patterns, . is a wildcard EXCEPT in [0-9.] type containers, then it's a literal
+####    So outside of bracketed items, it must be escaped, \. but inside, no need. Outside of gawk it should 
+####    be escaped in search patterns if you are using it as a literal.
 ####
 ####  As with all 'rules' there are acceptions, these are noted where used.
 ####
-####  print_screen_output " " # requires space, not null, to avoid error in for example in irssi
-####  For logging of array data, array must be placed into the temp_array, otherwise only the first key logs
-####
 ###################################################################################
 ####	KDE Konversation information.  Moving from dcop(qt3/KDE3) to dbus(qt4/KDE4)
 ###################################################################################
@ -481,15 +482,10 @@ DISTROS_LSB_GOOD="mandrake-release mandriva-release mandrakelinux-release"
 # Puppy Linux 4.1.2 (Bash 3.0: arrays won't work) --> works partially

 ### Bans Data
-# Precede a banword with $'\2' to prevent it from being subject to automated escaping by the make_ban_lists routine
-# $'\1' gets weird results :
-# user@host $ ARR=($'\x01'"one two" three four); echo ${ARR[0]} | hd -v
-# 00000000  01 01 6f 6e 65 20 74 77  6f 0a                    |..one two.|
-A_NORMAL_BANS=( computing computer corporation communications electronics electrical electric gmbh group industrial international revision software technologies technology $'\2'"\<ltd\>" ltd. ltd $'\2'"\<inc\>" intl. inc. $'\2'\<co\> co. corp. "(tm)" "(r)" "®" $'\2'"\(rev ..\)" )
-A_CPU_BANS=( @ cpu deca 'dual core' dual-core 'tri core' tri-core 'quad core' quad-core ennea genuine hepta hexa multi octa penta 'processor' processor single triple $'\2'"[0-9.]+ *[MmGg][Hh][Zz]" )
-# after processing, the ban arrays will be put into these:
-BAN_LIST_NORMAL=''
-BAN_LIST_CPU=''
+# Note that \<ltd\> bans only words, not parts of strings; in \<corp\> you can't use punctuation characters like . or ,
+# we're saving about 10% of the exec time by hand building the ban lists here, using hard quotes. 
+BAN_LIST_NORMAL='computing|computer|corporation|communications|electronics|electrical|electric|gmbh|group|industrial|international|revision|software|technologies|technology|\<ltd\>|ltd\.|\<inc\>|inc\.|intl\.|\<co\>|co\.|corp\.|\(tm\)|\(r\)|®|\(rev ..\)'
+BAN_LIST_CPU='@|cpu deca|dual core|dual-core|tri core|tri-core|quad core|quad-core|ennea|genuine|hepta|hexa|multi|octa|penta|processor|single|triple|[0-9\.]+ *[MmGg][Hh][Zz]'

 ### USB networking search string data, because some brands can have other products than
 ### wifi/nic cards, they need further identifiers, with wildcards.
@ -522,18 +518,6 @@ main()
 	check_script_depends
 	check_script_suggested_apps

-	# note: this needs to go AFTER depends check because these use gawk
-	# Do this after sourcing of config overrides so user can customize banwords
-	# Contrary to my previous belief, "${ARR[@]}" passes a quoted list, not one string
-	BAN_LIST_NORMAL=$( make_ban_lists "${A_NORMAL_BANS[@]}" )
-	BAN_LIST_CPU=$( make_ban_lists "${A_CPU_BANS[@]}" )
-	# echo "BAN_LIST_NORMAL='$BAN_LIST_NORMAL'"
-	# echo "BAN_LIST_CPU='$BAN_LIST_CPU'"
-
-	# first init function must be set first for colors etc. Remember, no debugger
-	# stuff works on this function unless you set the debugging flag manually.
-	# Debugging flag -@ [number] will not work until get_parameters runs.
-	
 	### Only continue if depends ok
 	SCRIPT_PATH=$( dirname $0 )
 	SCRIPT_VERSION_NUMBER=$( grep -im 1 'version:' $SCRIPT_PATH/$SCRIPT_NAME | gawk '{print $3}' )
@ -588,6 +572,10 @@ main()
 	# 	print_screen_output "DCSERVER: $DCSERVER"
 	# 	print_screen_output "DCTARGET: $DCTARGET"
 	
+	# first init function must be set first for colors etc. Remember, no debugger
+	# stuff works on this function unless you set the debugging flag manually.
+	# Debugging flag -@ [number] will not work until get_parameters runs.
+	
 	# "$@" passes every parameter separately quoted, "$*" passes all parameters as one quoted parameter.
 	# must be here to allow debugger and other flags to be set.
 	get_parameters "$@"
@ -843,34 +831,6 @@ sanitize_characters()
 	eval $LOGFE
 }

-# Filter boilerplate & buzzwords.
-# args: $1 - quoted: "$@" array of ban terms
-make_ban_lists()
-{
-	eval $LOGFS
-	local ban_list=''
-	# Iterate over $@
-	## note: this is a weird, non-intuitive method, needs some documentation or rewriting
-	## if you declare ban_string it stops working, have to read up on this
-	for ban_string
-	do
-		# echo "term=\"$ban_string\"" # >&2
-		if [[ ${ban_string:0:1} = $'\2' ]];then
-			ban_list="${ban_list}${ban_list+|}${ban_string:1:${#ban_string}-1}"
-		else
-			# Automatically escapes [ ] ( ) . and +
-			ban_list="${ban_list}${ban_list+|}$( echo "$ban_string" | gawk '{
-				gsub(/([\[\]+().])/,"\\\\&")
-				print
-			}' )"
-		fi
-	done
-
-	echo "$ban_list"
-	eval $LOGFS
-}
-# make_ban_lists "${A_CPU_BANS[@]}";exit
-
 # Set the colorscheme
 # args: $1 = <scheme number>|<"none">
 set_color_scheme()
@ -2590,7 +2550,7 @@ get_audio_data()

 	IFS=$'\n'
 	# this first step handles the drivers for cases where the second step fails to find one
-	device_count=$( echo "$Lspci_Data" | egrep -ic '(multimedia audio controller|audio device)' )
+	device_count=$( echo "$Lspci_Data" | grep -iEc '(multimedia audio controller|audio device)' )
 	if [[ $device_count -eq 1 ]] && [[ $B_ASOUND_DEVICE_FILE == 'true' ]];then
 		alsa_driver=$( gawk -F ']: ' '
 		BEGIN {
@ -2618,7 +2578,7 @@ get_audio_data()
 		IGNORECASE=1
 	}
 	/multimedia audio controller|audio device/ {
-		audioCard=gensub(/^[0-9a-f:.]+ [^:]+: (.+)$/,"\\1","g",$0)
+		audioCard=gensub(/^[0-9a-f:\.]+ [^:]+: (.+)$/,"\\1","g",$0)
 		# The doublequotes are necessary because of the pipes in the variable.
 		gsub(/'"$BAN_LIST_NORMAL"'/, "", audioCard)
 		gsub(/,/, " ", audioCard)
@ -2786,7 +2746,7 @@ get_audio_alsa_data()
 			# some alsa strings have the build date in (...)
 			# remove trailing . and remove possible second line if compiled by user
 			$0 !~ /compile/ {
-				gsub( "Driver | [(].*[)]|\.$","",$0 )
+				gsub( /Driver | [(].*[)]|\.$/,"",$0 )
 				gsub(/,/, " ", $0)
 				gsub(/^ +| +$/, "", $0)
 				gsub(/ [ \t]+/, " ", $0)
@ -3301,7 +3261,7 @@ get_distro_data()
 	log_function_data "distro_file: $distro_file"
 	# first test for the legacy antiX distro id file
 	if [[ -e /etc/antiX ]];then
-		distro="$( egrep -oi 'antix.*\.iso' <<< $( remove_erroneous_chars '/etc/antiX' ) | sed 's/\.iso//' )"
+		distro="$( grep -Eoi 'antix.*\.iso' <<< $( remove_erroneous_chars '/etc/antiX' ) | sed 's/\.iso//' )"
 	# this handles case where only one release/version file was found, and it's lsb-release. This would
 	# never apply for ubuntu or debian, which will filter down to the following conditions. In general
 	# if there's a specific distro release file available, that's to be preferred, but this is a good backup.
@ -3929,14 +3889,14 @@ get_hard_drive_data_advanced()

 	## check for all ide type drives, non libata, only do it if hdx is in array
 	## this is now being updated for new /sys type paths, this may handle that ok too
-	if [[ -n $( egrep 'hd[a-z]' <<< ${A_HDD_DATA[@]} ) ]];then
+	if [[ -n $( grep -E 'hd[a-z]' <<< ${A_HDD_DATA[@]} ) ]];then
 		# remember, we're using the last array item to store the total size of disks
 		for (( i=0; i < ${#A_HDD_DATA[@]} - 1; i++ ))
 		do
 			IFS=","
 			a_temp_working=( ${A_HDD_DATA[i]} )
 			IFS="$ORIGINAL_IFS"
-			if [[ -n $( egrep '^hd[a-z]' <<< ${a_temp_working[0]} ) ]];then
+			if [[ -n $( grep -E '^hd[a-z]' <<< ${a_temp_working[0]} ) ]];then
 				if [[ -e /proc/ide/${a_temp_working[0]}/model ]];then
 					a_temp_working[2]="$( remove_erroneous_chars /proc/ide/${a_temp_working[0]}/model )"
 				else
@ -3991,12 +3951,12 @@ get_hard_drive_data_advanced()
 	IFS="$ORIGINAL_IFS"

 	## then we'll loop through that array looking for matches.
-	if [[ -n $( egrep 'sd[a-z]' <<< ${A_HDD_DATA[@]} ) ]];then
+	if [[ -n $( grep -E 'sd[a-z]' <<< ${A_HDD_DATA[@]} ) ]];then
 		# first pack the main ls variable so we don't have to keep using ls /dev...
 		ls_disk_by_id="$( ls -l /dev/disk/by-id )"
 		for (( i=0; i < ${#A_HDD_DATA[@]} - 1; i++ ))
 		do
-			if [[ -n $( egrep '^sd[a-z]' <<< ${A_HDD_DATA[$i]} ) ]];then
+			if [[ -n $( grep -E '^sd[a-z]' <<< ${A_HDD_DATA[$i]} ) ]];then
 				IFS=","
 				a_temp_working=( ${A_HDD_DATA[$i]} )
 				IFS="$ORIGINAL_IFS"
@ -4012,7 +3972,7 @@ get_hard_drive_data_advanced()
 						# discovered disk name AND ends with the correct identifier, sdx
 						# get rid of whitespace for some drive names and ids, and extra data after - in name
 						temp_name=$( tr ' ' '_' <<< ${a_temp_scsi[$j]} | cut -d '-' -f 1 )
-						sd_ls_by_id=$( egrep -m1 ".*$temp_name.*${a_temp_working[0]}$" <<< "$ls_disk_by_id" )
+						sd_ls_by_id=$( grep -Em1 ".*$temp_name.*${a_temp_working[0]}$" <<< "$ls_disk_by_id" )

 						if [[ -n $sd_ls_by_id ]];then
 							temp_name=${a_temp_scsi[$j]}
@ -4029,7 +3989,7 @@ get_hard_drive_data_advanced()
 				if [[ -z $temp_name ]];then
 					temp_name="Name n/a"
 				else 
-					usb_exists=$( egrep -m1 "usb-.*$temp_name.*${a_temp_working[0]}$" <<< "$ls_disk_by_id" )
+					usb_exists=$( grep -Em1 "usb-.*$temp_name.*${a_temp_working[0]}$" <<< "$ls_disk_by_id" )
 					if [[ -n $usb_exists ]];then
 						a_temp_working[3]='USB'
 					fi
@ -4215,7 +4175,7 @@ get_networking_data()
 		IGNORECASE=1
 		counter=0 # required to handle cases of > 1 instance of the same chipset
 	}
-	/^[0-9a-f:.]+ (ethernet|network) (controller|bridge)/ || /^[0-9a-f:.]+ [^:]+: .*(ethernet|network).*$/ {
+	/^[0-9a-f:\.]+ (ethernet|network) (controller|bridge)/ || /^[0-9a-f:\.]+ [^:]+: .*(ethernet|network).*$/ {
 		nic=gensub(/^[0-9a-f:\.]+ [^:]+: (.+)$/,"\\1","g",$0)
 		gsub(/realtek semiconductor/, "Realtek", nic)
 		gsub(/davicom semiconductor/, "Davicom", nic)
@ -5723,12 +5683,12 @@ calculate_multicore_data()
 	eval $LOGFS
 	local string_number=$1 string_data=''

-	if [[ -n $( egrep -i '( mb| kb)' <<< $1 ) ]];then
+	if [[ -n $( grep -Ei '( mb| kb)' <<< $1 ) ]];then
 		string_data=" $( gawk '{print $2}' <<< $1 )" # add a space for output
 		string_number=$( gawk '{print $1}' <<< $1 )
 	fi
 	# handle weird error cases where it's not a number
-	if [[ -n $( egrep '^[0-9\.,]+$' <<< $string_number ) ]];then
+	if [[ -n $( grep -E '^[0-9\.,]+$' <<< $string_number ) ]];then
 		string_number=$( echo $string_number $2 | gawk '{
 			total = $1*$2
 			print total