Flatten nested array but also add information from another array in python

Hi I am currently doing machine learning, here is something tricky for me. Here is a dictionary that contains arrays "data" and "name", the number of subarrays is the same, which means that they are related to each other by one level and the position of the array. for example, data [-12.746249259898983, -40.618078412505398, -53.765980530841738, 5.2261728305123398, 0.12162578441442624, -5.7948531230422988, -449155.5052490317, 0.91963697398417421] total name is 100, and data -12.746249259898983 which inside that array name is '100.0'.

However, a name can be a sibling array or a nested array.

{'data': [[ [-12.746249259898983, -40.618078412505398, -53.765980530841738, 5.2261728305123398, 0.12162578441442624, -5.7948531230422988, -449155.5052490317, 0.91963697398417421], [-98.037517854387147, -221.057540054155, -159.4518899022695, 2.8761226353857214, -5.3555144738112865, -8.4617336886131636, -1078807.4268864163, -13.13441127151575], [20.903581634463947, 25.329222910065528, 5.412150882014295, 9.8334972052776468, 5.0344964646349721, -4.9321628192214595, 465839.20729012048, 13.101100255062818], [6906900.092440652, 41574768.75710336, 10927415.550909607, 11310.66577299293, 8798.704300502737, 8799.682832152064, 183845205120901.88, 112667.8001209582], [23668.070022021395, 50044.215525581945, 31776.017910004855, 1187.8286828919734, 1004.3222306303834, 1097.6394464618995, 144191707.36233276, 3204.3354707366584]], [[-12.746249259898983, -40.618078412505398, -53.765980530841738, 5.2261728305123398, 0.12162578441442624, -5.7948531230422988, -449155.5052490317, 0.91963697398417421], [-98.037517854387147, -221.057540054155, -159.4518899022695, 2.8761226353857214, -5.3555144738112865, -8.4617336886131636, -1078807.4268864163, -13.13441127151575], [20.903581634463947, 25.329222910065528, 5.412150882014295, 9.8334972052776468, 5.0344964646349721, -4.9321628192214595, 465839.20729012048, 13.101100255062818], [6906900.092440652, 41574768.75710336, 10927415.550909607, 11310.66577299293, 8798.704300502737, 8799.682832152064, 183845205120901.88, 112667.8001209582], [23668.070022021395, 50044.215525581945, 31776.017910004855, 1187.8286828919734, 1004.3222306303834, 1097.6394464618995, 144191707.36233276, 3204.3354707366584]], [[105488.30345760827], [3289.7903599842557], [1084894.676814588], [14144.972809638024], [-0.28007907692942713, 0.22442925752465423, 0.53764244860950272], [-0.17191558990475442, -0.79049974125170352, 0.50351854155808451], [0.82458612961411526, 0.76972732632638607, 0.85167351947413183], [1.0, 1.0, 1.0]], [[523.30228588437444, 45.819830845333598, 28.816263055785519, -13.399823220658771, -4.3270838526008539, 6.3733684200628371, 8595795.3597348519, 8.384858849273007], [475.31250457055427, -228.32291772427124, -176.40773800372068, -18.093080976490384, -10.369967809128632, 4.2210565912106244, 7098802.0557304611, -7.1771997695309269], [588.25628456928746, 294.97718389914189, 262.91102656456047, -10.427939493412241, 1.4416568032097778, 8.5693307123828362, 10382020.11504475, 25.39147629554094], [48743197.97266989, 30241091.100284778, 18077525.93554477, 34815.577041886056, 13488.38255077284, 9285.35861896049, 1.33034499502354e+16, 137928.77372005512], [88438.08631445921, 56149.17419161105, 43172.45645293931, 2264.570124291333, 1314.3600405678308, 1119.5488398712207, 1452689415.79519, 3947.582767320521]], [[523.30228588437444, 45.819830845333598, 28.816263055785519, -13.399823220658771, -4.3270838526008539, 6.3733684200628371, 8595795.3597348519, 8.384858849273007], [475.31250457055427, -228.32291772427124, -176.40773800372068, -18.093080976490384, -10.369967809128632, 4.2210565912106244, 7098802.0557304611, -7.1771997695309269], [588.25628456928746, 294.97718389914189, 262.91102656456047, -10.427939493412241, 1.4416568032097778, 8.5693307123828362, 10382020.11504475, 25.39147629554094], [48743197.97266989, 30241091.100284778, 18077525.93554477, 34815.577041886056, 13488.38255077284, 9285.35861896049, 1.33034499502354e+16, 137928.77372005512], [88438.08631445921, 56149.17419161105, 43172.45645293931, 2264.570124291333, 1314.3600405678308, 1119.5488398712207, 1452689415.79519, 3947.582767320521]], [[187759.71695900976], [4698.479004730381], [1027854.7076158928], [23680.601277936279], [-0.15439507445075032, -0.075920544072209092, 0.66503179584326411], [0.46923791478750121, -0.69581655384848085, -0.72748336248359702], [0.21092970277607359, 0.14987815855097059, 0.81570800991816339], [1.0, 1.0, 1.0]], [[13.396472405737706, -18.359344127602043, 36.826432688017405, -5.4414333610698273, -10.694878175101108, 7.9448139442889429, 10816437.307411144, 0.19533722684887966], [-15.189934136155252, -200.13563785186551, -109.36104765476506, -11.7199280963678, -17.858474582674631, 6.8660269597012578, 1044415.9081568928, -1.3455015074983672], [72.972793023164726, 181.79005098185144, 199.11746464797852, -4.9326473220315465, -5.0927085243861612, 14.603766227834551, 19440694.34613679, 1.7339256547371409], [1384309.4521168934, 8347935.341004645, 7917333.307240587, 29378.49283018787, 32011.489849631027, 36975.920226450995, 3.3465760911327116e+16, 911.4959262418305], [11296.673361701862, 32414.28539427253, 29892.827418169887, 1857.859505569651, 2026.9222424051377, 2193.920509546758, 1961706423.8618736, 309.5250420877879]], [[13.396472405737706, -18.359344127602043, 36.826432688017405, -5.4414333610698273, -10.694878175101108, 7.9448139442889429, 10816437.307411144, 0.19533722684887966], [-15.189934136155252, -200.13563785186551, -109.36104765476506, -11.7199280963678, -17.858474582674631, 6.8660269597012578, 1044415.9081568928, -1.3455015074983672], [72.972793023164726, 181.79005098185144, 199.11746464797852, -4.9326473220315465, -5.0927085243861612, 14.603766227834551, 19440694.34613679, 1.7339256547371409], [1384309.4521168934, 8347935.341004645, 7917333.307240587, 29378.49283018787, 32011.489849631027, 36975.920226450995, 3.3465760911327116e+16, 911.4959262418305], [11296.673361701862, 32414.28539427253, 29892.827418169887, 1857.859505569651, 2026.9222424051377, 2193.920509546758, 1961706423.8618736, 309.5250420877879]], [[73603.78617414428], [6078.7022575215515], [625733.61247557076], [45672.991571875093], [0.043448760802826739, 0.23278517949941815, -0.4412484230587122], [0.73571039835259899, -0.97150579562327632, -0.85102560336644162], [0.43273760788012128, 0.59834831245116515, 0.76048753082083709], [1.0, 1.0, 1.0]], [[-4312.4566991228266, 111.37588860973005, -369.27441630396561, 1.0221741887428542, 10.264001072191407, -0.64794873494824146, -26535.315429097467, 3733.3894352435532], [-4391.6993203248185, 2.1930377207860374, -556.18155858830767, -2.8465168776531042, 6.8275893393989735, -2.992456229023948, -134301.2497497506, 3085.6901898537008], [-4265.6954381966243, 490.57527638444799, -106.53884094262287, 2.6903234802233951, 14.899879413776139, 1.2542987455939745, 124282.28583445404, 4312.3584935051322], [3154842424.3400664, 66787778.51899314, 37678234.97132428, 1886.122427403741, 27432.6986991385, 1552.4163983149103, 9721961586366.314, 2438338620.274526], [728805.1821517571, 88155.59598567848, 68098.54374026474, 500.1103528606394, 1980.9257474491533, 442.23338752473614, 30550976.541662402, 630942.8145561604]], [[-4312.4566991228266, 111.37588860973005, -369.27441630396561, 1.0221741887428542, 10.264001072191407, -0.64794873494824146, -26535.315429097467, 3733.3894352435532], [-4391.6993203248185, 2.1930377207860374, -556.18155858830767, -2.8465168776531042, 6.8275893393989735, -2.992456229023948, -134301.2497497506, 3085.6901898537008], [-4265.6954381966243, 490.57527638444799, -106.53884094262287, 2.6903234802233951, 14.899879413776139, 1.2542987455939745, 124282.28583445404, 4312.3584935051322], [3154842424.3400664, 66787778.51899314, 37678234.97132428, 1886.122427403741, 27432.6986991385, 1552.4163983149103, 9721961586366.314, 2438338620.274526], [728805.1821517571, 88155.59598567848, 68098.54374026474, 500.1103528606394, 1980.9257474491533, 442.23338752473614, 30550976.541662402, 630942.8145561604]], [[885059.3218777011], [2923.2694878345296], [2236604.1192022543], [8808.54792655048], [-0.67448622916323409, -0.50158101433414648, 0.59882229555795874], [-0.78104865531713752, -0.76429278912704701, 0.63516453144318363], [0.20462205303333128, 0.84578383059323625, 0.59233056719450861], [1.0, 1.0, 1.0]], [[-259.72665899990471, -83.479321394167755, -121.15460261636761, -9.5566399167895906, -10.751374026150252, 6.9216241967193373, 14885579.317280933, -21.445028515600601], [-443.23038559240399, -180.2214836399863, -177.88859043152414, -17.157662401019614, -16.084610849457864, 2.4976391930266151, 10238968.298370993, -44.428320700622876], [-303.57098842656001, 48.893029546726382, -69.225922326687694, -3.4513266189454357, -7.9243380822281777, 11.288531677853044, 19912765.253164865, 2.6916854500962191], [41735450.1518125, 24390799.584619, 9017310.78529916, 28630.03604412623, 27367.43896473854, 17107.884579392492, 4.242364584898001e+16, 253955.56763607], [75924.07800808054, 43290.42097424058, 29303.03439665969, 1778.5410136020398, 1988.7244606604343, 1372.5600617945775, 2515662904.620479, 4776.4285037481795]], [[-259.72665899990471, -83.479321394167755, -121.15460261636761, -9.5566399167895906, -10.751374026150252, 6.9216241967193373, 14885579.317280933, -21.445028515600601], [-443.23038559240399, -180.2214836399863, -177.88859043152414, -17.157662401019614, -16.084610849457864, 2.4976391930266151, 10238968.298370993, -44.428320700622876], [-303.57098842656001, 48.893029546726382, -69.225922326687694, -3.4513266189454357, -7.9243380822281777, 11.288531677853044, 19912765.253164865, 2.6916854500962191], [41735450.1518125, 24390799.584619, 9017310.78529916, 28630.03604412623, 27367.43896473854, 17107.884579392492, 4.242364584898001e+16, 253955.56763607], [75924.07800808054, 43290.42097424058, 29303.03439665969, 1778.5410136020398, 1988.7244606604343, 1372.5600617945775, 2515662904.620479, 4776.4285037481795]], [[148517.53337898097], [5139.82553605705], [1242960.9144121602], [27678.070645658128], [-0.072211399963483205, -0.01330100771527422, 0.50872724028242133], [0.12899184324668519, -0.83705081189441499, -0.30244329509440016], [0.8127044117719453, 0.76986342841470423, 0.67385354549078191], [1.0, 1.0, 1.0]]],
 'name': [[100, 101, 102, 103, 104, 105], [106, 107, 108], [109, 110, 111, 112, 113, 114, 115, 116], [200, 201, 202, 203, 204, 205], [206, 207, 208], [209, 210, 211, 212, 213, 214, 215, 216], [300, 301, 302, 303, 304, 305], [306, 307, 308], [309, 310, 311, 312, 313, 314, 315, 316], [400, 401, 402, 403, 404, 405], [406, 407, 408], [409, 410, 411, 412, 413, 414, 415, 416], [500, 501, 502, 503, 504, 505], [506, 507, 508], [509, 510, 511, 512, 513, 514, 515, 516]]}

      

Now I want to flatten the data and make all of it on the first floor, however, once I do that, the name will be mismatched.

More generally (recall that the data it itself can get is more than the nested level, but the address won't be) if I got: {'data':[[[1],[2,3,[4]]],[[5,6],[7,8,9]]],'name':[[100,101],[202,203]]}

which100 is the name of [1] , 101 is the name of [2,3,[4]],202 is name of [5,6],203 is the name of [7,8,9]

more clearly:

`{'data':[[ [1],[2,3,[4]] ],[ [5,6],[7,8,9]]]
             |      |           |      |
'name':  [[ 100,   101    ],[  202,   203  ]]

      

as soon as I flattened the data and became: the [1,2,3,4,5,6,7,8,9]

corresponding array of names is still [100,101,202,203] which means a mismatch, I want them to be matched, so when I flatten the nested array I could get the same name of the elements inside, like get [1,2,3,4,5,6,7,8,9]

and [100.0,101.0,101.1,101.2,101.2,202.0,202.1,203.0,203.1,203.2]

since [2,3,[4]]

all are named 101

, 100.0 means that the first element in the array, which is 2,101.2, means the third in the array (don't care about further nested representation [4] is still 101.2, but not 101.20, but [[4]] also 101.2, but not 101.200, for a reason, if you are a magician, I wish I had this cool name style, but I think it is hard enough for the question), so the array of names gets the correct aliasing too ...

It's kind of a brainstorming session .. Thanks for any help ..

### I rewrote the question, last day I found some errors in my original data with @ schlezzz15, really thanks, it's a shame that the data is too big and hard to find the error inside, very sorry
+3


source to share


2 answers


An interesting problem.

Solution with zip , itertools.chain , itertools.cycle and compiler.ast.flatten



from compiler.ast import flatten
from itertools import chain,cycle
d = {'data':[[ [1],[2,3,[4]] ],[ [5,6],[7,8,9]]],       
     'name':  [[ 100,   101    ],[  202,   203  ]]}

data,name = chain(*d["data"]),list(chain(*d["name"]))
data = [flatten(sublist)for sublist in data ]

print name
# >>> [100, 101, 202, 203]
print data
# >>> [[1], [2, 3, 4], [5, 6], [7, 8, 9]]

aligned = zip(name,data)
print aligned
# >>> [(100, [1]), (101, [2, 3, 4]), (202, [5, 6]), (203, [7, 8, 9])]

result = [zip(cycle([n]),da) for n,da in aligned]
print result
# >>> [[(100, 1)], [(101, 2), (101, 3), (101, 4)], [(202, 5), (202, 6)], [(203, 7), (203, 8), (203, 9)]]

flattened_result = list(chain(*result))
print flattened_result
# >>> [(100, 1), (101, 2), (101, 3), (101, 4), (202, 5), (202, 6), (203, 7), (203, 8), (203, 9)]


string_result = [str(item[0])+"."+str(item[1]-1) for item in flattened_result]
print string_result
# >>> ['100.0', '101.1', '101.2', '101.3', '202.4', '202.5', '203.6', '203.7', '203.8']


# I do not recommend the below version, only on OP request.
d = {'data':[[ [1],[2,3,[4]] ],[ [5,6],[7,8,9]]],       
     'name':  [[ 100,   101    ],[  202,   203  ]]}
all_in_one =[str(item[0])+"."+str(item[1]-1) for item in chain(*[zip(cycle([n]),da) for n,da in zip((chain(*d["name"])),(flatten(sublist)for sublist in chain(*d["data"])))])]
print all_in_one == string_result
# >>> True

      

I'm not sure what output format you would like, so I have included all intermediate results. It will be easy for you to get the one you want. If you provide the desired output, I'll correct my solution.

+3


source


It might work. I have not tested it.



from compiler.ast import flatten

def main():
    dict_ = {'data': [1, [2, 3, [4]]], 'name': [1000, 1001]}
    data = dict_.get('data')
    name = dict_.get('name')
    flatten_dict = {}

    for i in xrange(len(data)):
        if isinstance(data[i], int):
            flatten_dict.update({data[i]: name[i]})
        elif isinstance(data[i], list):
            list_flatten = flatten(data[i])
            for item in list_flatten:
                flatten_dict.update({item: name[i]})

    print flatten_dict

main()

      

0


source







All Articles