Bash - Sort the keys of an associative array in-memory in ascending order?

151 Views Asked by At

This Bash function is part of a script that i've recently wrote (following below is a minimal working example of the script with sample data, including an invocation of the function):

#!/bin/bash

declare -A result

stopSchedule='[{"stopId":41571,"lineId":1,"routeId":28,"remainingTime":[3450,8970,13170]},{"stopId":41571,"lineId":2,"routeId":29,"remainingTime":[1410,7950,12750]},{"stopId":41571,"lineId":2,"routeId":30,"remainingTime":[3030,9570,14370]},{"stopId":41571,"lineId":13,"routeId":36,"remainingTime":[3210,7410]},{"stopId":41571,"lineId":6,"routeId":39,"remainingTime":[3090,8790,13110]},{"stopId":41571,"lineId":8,"routeId":43,"remainingTime":[524,4590,9030]},{"stopId":41571,"lineId":8,"routeId":44,"remainingTime":[2190,6150,10590]},{"stopId":41571,"lineId":12,"routeId":52,"remainingTime":[1590]},{"stopId":41571,"lineId":10,"routeId":54,"remainingTime":[2970]}]'
stopsData='[{"id":41571,"areaId":1,"number":"44","name":"МБАЛ Добрич","translations":{},"lat":43.56184005737305,"lon":27.818910598754883,"note":""}]'
routesData='[{"id":44,"lineId":8,"direction":"Практикер-Депо","name":"Практикер-Депо","begin":1682629200,"end":0,"length":9876,"stopIds":[41674,41822,41630,41747,41750,41806,41681,41718,41745,41552,41576,41571,41731,41589,41627,41802,41643,41590,41625,41600,41694,41604,41676,41579,41654,41641,41595],"stopOffsets":[0,120,180,240,300,360,480,540,600,660,720,780,840,900,960,1020,1080,1140,1260,1320,1380,1440,1500,1560,1620,1680,1740]},{"id":43,"lineId":8,"direction":"Депо-Практикер","name":"Депо-Практикер","begin":1682629200,"end":0,"length":10076,"stopIds":[41595,41641,41655,41578,41675,41603,41695,41624,41591,41642,41803,41626,41588,41602,41571,41658,41746,41717,41682,41807,41749,41748,41724,41631,41629,41674],"stopOffsets":[0,60,120,180,240,300,360,420,480,540,600,660,720,780,900,960,1020,1080,1140,1260,1320,1380,1440,1500,1560,1680]},{"id":39,"lineId":6,"direction":"Балик-Дружба-Център","name":"Балик-Дружба-Център","begin":1682629200,"end":0,"length":10237,"stopIds":[41688,41689,41710,41559,41686,41791,41567,41703,41636,41743,41612,41613,41617,41529,41615,41548,41805,41681,41665,41773,41576,41571,41658,41623,41764,41627,41802],"stopOffsets":[0,60,120,180,300,360,480,540,660,720,840,900,960,1080,1140,1200,1260,1320,1380,1440,1500,1560,1620,1740,1800,1860,1920]},{"id":29,"lineId":2,"direction":"Балик-Старт","name":"Балик-Старт","begin":1682542801,"end":0,"length":11100,"stopIds":[41688,41689,41710,41786,41559,41686,41693,41634,41565,41632,41795,41793,41536,41652,41821,41619,41681,41718,41745,41552,41576,41571,41731,41589,41627,41767,41734,41684,41692],"stopOffsets":[0,60,120,180,300,360,480,540,600,720,780,840,900,960,1020,1140,1260,1380,1440,1500,1620,1680,1740,1800,1860,1980,2040,2100,2160]},{"id":28,"lineId":1,"direction":"АПК-Строител","name":"АПК-Строител","begin":1682542801,"end":0,"length":9655,"stopIds":[41687,41545,41719,41580,41645,41647,41571,41658,41746,41717,41682,41608,41701,41811,41582,41728,41649,41594,41787,41711,41568,41754,41792,41910,41693],"stopOffsets":[0,60,180,300,360,420,540,600,660,780,840,900,960,1020,1080,1140,1200,1260,1320,1380,1440,1560,1620,1681,1740]},{"id":30,"lineId":2,"direction":"Старт-Балик","name":"Старт-Балик","begin":1682542801,"end":0,"length":11140,"stopIds":[41692,41683,41733,41768,41626,41588,41602,41571,41658,41746,41717,41682,41620,41666,41653,41537,41794,41796,41633,41912,41910,41693,41685,41787,41711,41688],"stopOffsets":[0,60,120,180,300,360,420,540,600,660,720,780,900,960,1080,1140,1200,1320,1380,1500,1560,1620,1740,1860,1920,1980]},{"id":52,"lineId":12,"direction":"Липите-Център","name":"Липите-Център","begin":1682629200,"end":0,"length":7705,"stopIds":[41669,41677,41679,41722,41551,41660,41799,41698,41818,41671,41605,41781,41576,41571,41658,41746,41717,41682,41608,41701,41811,41716],"stopOffsets":[0,120,180,300,360,420,540,600,660,720,840,900,960,1020,1080,1140,1260,1320,1380,1440,1500,1620]},{"id":34,"lineId":4,"direction":"Гробищен парк-Депо","name":"Гробищен парк-Депо","begin":1682629200,"end":0,"length":14352,"stopIds":[41592,41663,41809,41574,41724,41631,41629,41674,41628,41541,41616,41528,41618,41564,41594,41787,41711,41568,41754,41566,41632,41695,41789,41763,41570,41639,41534,41713,41759,41725,41585,41579,41654,41641,41595],"stopOffsets":[0,60,120,180,300,360,420,540,600,660,780,840,960,1080,1200,1320,1380,1440,1560,1680,1800,1860,1920,2040,2100,2160,2220,2280,2340,2400,2460,2520,2640,2700,2760]},{"id":54,"lineId":10,"direction":"Добротица-Рилци","name":"Добротица-Рилци","begin":1691960400,"end":0,"length":13838,"stopIds":[41724,41631,41629,41541,41805,41620,41666,41820,41653,41537,41753,41700,41681,41665,41773,41576,41571,41658,41623,41815,41709,41803,41829,41752,41766,41812,41816,41729,41690],"stopOffsets":[0,60,120,180,240,300,360,370,480,540,600,720,840,900,960,1020,1080,1140,1200,1260,1320,1440,1500,1620,1740,1860,1920,2040,2100]},{"id":36,"lineId":13,"direction":"Гробищен парк-Депо","name":"Гробищен парк-Депо","begin":1682629200,"end":0,"length":9814,"stopIds":[41592,41663,41809,41574,41747,41750,41806,41681,41665,41773,41571,41731,41589,41627,41715,41547,41570,41639,41534,41713,41759,41725,41585,41579,41654,41595],"stopOffsets":[0,60,120,180,300,360,480,600,720,780,900,960,1020,1080,1140,1200,1260,1320,1380,1440,1500,1560,1620,1680,1740,1800]}]'

function stopNameAndNumById {
    echo $stopsData | jq -r --arg id "$1" '.[] | select(.id == ($id | tonumber)) | "\(.number),\(.name)"'
}

function collectLinesDesc {
    if [ -z "$linesDesc" ]; then
        linesDesc='[{"id":13,"kind":"BUS","number":"4а","name":"Линия 4A","nightly":false,"routeIds":[35,36],"type":"URBAN","carrier":"X"}]'
    fi
}

function linesFromStopId {
    IFS=', ' read -ra id_arr <<< "$1"
    i=true

    for id in "${id_arr[@]}"; do
        stopNumName=$(stopNameAndNumById "$id")
        for row in $(echo "$stopSchedule" | jq -c -r '.[] | {lineId, remainingTime, routeId}'); do
            lineId=$(echo "$row" | jq -r '.lineId')
            routeId=$(echo "$row" | jq -r '.routeId')
            remainingTime=$(echo "$row" | jq -r '.remainingTime[0]')
            routeName=$(echo "$routesData" | jq -r '.[] | select(.id == '$routeId') | .name')
            remainingTimeInMinutes=$((remainingTime / 60))
            key=$lineId
            while [[ -n "${result[$key]}" ]]; do
                key="${key}_"
            done
            result["$key"]="$remainingTimeInMinutes,$routeName"
        done
        [ $i = true ] && i=false || echo
        echo -e "Спирка $(cut -d',' -f2 <<< $stopNumName) ($(cut -d',' -f1 <<< $stopNumName))\n"
        for key in "${!result[@]}"; do
            newKey="$key"
            if [[ $key == *_ ]]; then
                newKey="${key%_}"
            fi
            IFS=',' read -ra values <<< "${result[$key]}"
            if [ $newKey -gt "12" ]; then
                collectLinesDesc
                newKey=$(echo $linesDesc | jq -r --arg id "$newKey" '.[] | select(.id == ($id | tonumber)) | .number' | awk '{ print toupper($0) }')
            fi
            echo "Линия $newKey (${values[1]}) - ${values[0]} минути"
        done
        result=()
    done
}

linesFromStopId "41571"

I'm looking for an elegant way to sort the keys of the $result associative array in an ascending order in memory, not when i'm printing the data to the output.

Before the while [[ -n "${result[$key]}" ]]; do loop, it is guaranteed that the keys of the array contain only numbers (one or two digits long).

collectLinesDesc collects the description of the bus lines from a RESTful API if we haven't already collected them before.

The current output of the minimal working example will be:

Спирка МБАЛ Добрич (44)

Линия 8 (Практикер-Депо) - 0 м.
Линия 8 (Депо-Практикер) - 0 м.
Линия 6 (Балик-Дружба-Център) - 0 м.
Линия 2 (Балик-Старт) - 0 м.
Линия 1 (АПК-Строител) - 0 м.
Линия 2 (Старт-Балик) - 0 м.
Линия 12 (Липите-Център) - 0 м.
Линия 4А (Гробищен парк-Депо) - 0 м.
Линия 10 (Добротица-Рилци) - 0 м.

Expected output (the number after "Линия" sorted in ascending order):

Спирка МБАЛ Добрич (44)

Линия 1 (Практикер-Депо) - 0 м.
Линия 2 (Депо-Практикер) - 0 м.
Линия 2 (Балик-Дружба-Център) - 0 м.
Линия 6 (Балик-Старт) - 0 м.
Линия 8 (АПК-Строител) - 0 м.
Линия 8 (Старт-Балик) - 0 м.
Линия 10 (Липите-Център) - 0 м.
Линия 12 (Гробищен парк-Депо) - 0 м.
Линия 4A (Добротица-Рилци) - 0 м.

("4A" here is a display value, in the array it will be a number bigger than 12, so it will always be last when sorted in ascending order, so the logic remains consistent with the display logic)

One way i can think of is to make jq output the data from the JSON ordered by lineId, and then use a second indexed array to keep the order of the entries, then using that indexed array as a key for the associative aray $result when i iterate through it in the loop that finally outputs the data, but is there a nicer / shorter / simpler way to achieve the same in Bash?

2

There are 2 best solutions below

1
jhnc On BEST ANSWER

Since the keys are defined to be small integers, you could trivially keep result (partially) ordered by using -a instead of -A, since indexed arrays are traversed by increasing numerical index.

Store the key as a scaled-up value (eg. multiply by 1000). Whenever there is a collision, instead of appending _, add 1.

When using the keys later, scale back down (eg. divide by 1000). Since bash uses integer arithmetic, this will drop any fractional part, giving the original pre-collision key.

For example:

# declare -A result
declare -a result
# ...
            # key=$lineId
            (( key = lineId*1000 ))
            while [[ -n "${result[$key]}" ]]; do
                # key="${key}_"
                (( key++ ))
            done
# ...
        for key in "${!result[@]}"; do
            # newKey="$key"
            # if [[ $key == *_ ]]; then
            #    newKey="${key%_}"
            # fi
            (( newKey = key/1000 ))
# ...

but this will produce different output from what was requested:

Спирка МБАЛ Добрич (44)

Линия 1 (АПК-Строител) - 57 минути
Линия 2 (Балик-Старт) - 23 минути
Линия 2 (Старт-Балик) - 50 минути
Линия 6 (Балик-Дружба-Център) - 51 минути
Линия 8 (Депо-Практикер) - 8 минути
Линия 8 (Практикер-Депо) - 36 минути
Линия 10 (Добротица-Рилци) - 49 минути
Линия 12 (Липите-Център) - 26 минути
Линия 4А (Гробищен парк-Депо) - 53 минути

It is very unclear why you expect that sorting the keys will change the values stored in the array.


The original code looks as though it may be buggy in the case where there is more than one collision (eg. consider what happens if you replace 6 with 8 in the data).

0
jqurious On

As your real JSON data comes from several curl calls, it may require some restructuring, but perhaps this is still a useful example:

As a potential alternative to running individually on each id and using select(.id == ...) you could combine the records using INDEX() and JOIN()

stopSchedule='...'
stopsData='...'
routesData='...'
linesDesc='...'

jq -n -r \
\
  --argjson stopsData    "$stopsData"    \
  --argjson stopSchedule "$stopSchedule" \
  --argjson routesData   "$routesData"   \
  --argjson linesDesc    "$linesDesc"    \
'
[
   # extract lines.number and combine records based on lines.id == stop.lineId
   # create result with { routeId: {...} } shape in order to JOIN with routesData
   JOIN( 
      INDEX($linesDesc[] | {id, number}; .id);
      $stopSchedule[] | {lineId, routeId, remainingTime};
      .lineId | tostring; 
      . as [$stop, $line] | {($stop.routeId | tostring): add} 
   ) as $stopSchedule
   | 
   # extract route.name and combine records based on stop.id == route.id
   JOIN( 
      $stopSchedule; 
      $routesData[] | {id, name}; 
      .id | tostring; 
      add
   )
   | .remainingTime = ((.remainingTime[0] // 0) / 60 | floor)
]
| sort_by(.lineId)[]
| select(.lineId) # skip {"lineId": null}
| "Линия \(if .lineId > 12 then .number else .lineId end) (\(.name)) - \(.remainingTime) минути"
'

Линия 1 (АПК-Строител) - 57 минути
Линия 2 (Балик-Старт) - 23 минути
Линия 2 (Старт-Балик) - 50 минути
Линия 6 (Балик-Дружба-Център) - 51 минути
Линия 8 (Депо-Практикер) - 8 минути
Линия 8 (Практикер-Депо) - 36 минути
Линия 10 (Добротица-Рилци) - 49 минути
Линия 12 (Липите-Център) - 26 минути
Линия 4а (Гробищен парк-Депо) - 53 минути

The only missing piece being toupper (unfortunately, unicode upcase did not make it into 1.7)

You can however output the id/number, name, time and print the final string with shell/awk.