>

3 개의 열이있는 데이터 프레임이 있고 튜플 목록 인 해당 데이터 프레임의 두 번째 열만 반복하고 싶습니다. 해당 목록에서 각 튜플의 마지막 요소 만 추가하고 싶습니다

fuzzywuzzy를 사용하여 텍스트 일치 스크립트를 만들었습니다.

import pandas as pd
from fuzzywuzzy import process, fuzz

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 10)
data = pd.read_csv(r"address_details.csv", skiprows=0)
id = data['COD_CUST_ID'].values.tolist()
address = data['ADDRESS'].values.tolist()
dict_list=[]
for i in range(0,len(id)):
    for add in range(0,len(address)):
        score=process.extractBests(address[add], address, limit=len(address), score_cutoff=40)
        #print(score)
        dict_={}
        dict_.update({"Cust_Id": id[i]})
        dict_.update({"Match Ratio": score})
        dict_.update({"Search String": address[add]})
        #dict_.update({"Address List": address})
        dict_list.append(dict_)
df=pd.DataFrame(dict_list)
print(df)
s=df.to_csv("match_score.csv",sep=',',index=None)

원래 CSV 데이터

Cust_Id Match Ratio Search String
21527575    [('H.NO.407,ROOM NO.310. 3RD FLOOR MAQBOOL APARTMENT APARTMENT OPP, RABIYA MASJID MANGAL BAZAR SLAP KOT THANE MAHARASHTRA 421302', 100)]    H.NO.407,ROOM NO.310. 3RD FLOOR MAQBOOL APARTMENT APARTMENT OPP, RABIYA MASJID MANGAL BAZAR SLAP KOT THANE MAHARASHTRA 421302
21527575    [('H.NO.407, ROOM NO.310, 3RD FLOOR MAQBOOL APARTMENT OPP,RABIYA MASJID MANGAL BAZAR SLAP KOTER GATE THANE MAHARASHTRA 421302', 100)]   H.NO.407, ROOM NO.310, 3RD FLOOR MAQBOOL APARTMENT OPP,RABIYA MASJID MANGAL BAZAR SLAP KOTER GATE THANE MAHARASHTRA 421302
21527575    [('FLAT NO.103, 1ST FLOOR B-WING,CTS NO.388,KAAP TALAVO  ZAITOON PURA BEHIND KOTER GATE MASJID BHIWANDI THANE MAHARASHTRA 421302', 100)]    FLAT NO.103, 1ST FLOOR B-WING,CTS NO.388,KAAP TALAVO  ZAITOON PURA BEHIND KOTER GATE MASJID BHIWANDI THANE MAHARASHTRA 421302
21527575    [('VPO. SAHWA   CHURU RAJASTHAN 331302', 100)]  VPO. SAHWA   CHURU RAJASTHAN 331302
21527575    [('WARD NO.-3 NATT ROAD TALWANDI SABO BATHINDA  BATHINDA PUNJAB 151302', 100)]  WARD NO.-3 NATT ROAD TALWANDI SABO BATHINDA  BATHINDA PUNJAB 151302
21527575    [('H.NO.-137 RAMA ROAD TALWANDI SABO BATHINDA  BATHINDA PUNJAB 151302', 100)]   H.NO.-137 RAMA ROAD TALWANDI SABO BATHINDA  BATHINDA PUNJAB 151302
21527575    [('WARD NO 25 GHADSISAR ROAD BASANT KUNJ KE SAMNE HANUMAN MANDIR KE PASS CHOUDHARY COLONY GANGASHAR BIKANER RAJASTHAN 334001', 100)]    WARD NO 25 GHADSISAR ROAD BASANT KUNJ KE SAMNE HANUMAN MANDIR KE PASS CHOUDHARY COLONY GANGASHAR BIKANER RAJASTHAN 334001
21527575    [('Karchha Kalan   UDAIPUR RAJASTHAN 313803', 100)] Karchha Kalan   UDAIPUR RAJASTHAN 313803
21527575    [('VAGPUR KARCHCHA KALAN   UDAIPUR RAJASTHAN 313803', 100)] VAGPUR KARCHCHA KALAN   UDAIPUR RAJASTHAN 313803
21527575    [('VILLAGE GORIYAN TEHSIL UDAIPURWATI DIST JHUNJHUNU  JHUJHUNU RAJASTHAN 333307', 100)] VILLAGE GORIYAN TEHSIL UDAIPURWATI DIST JHUNJHUNU  JHUJHUNU RAJASTHAN 333307

원하는 결과 :

Cust_Id Match Ratio Search String
21527575    100 H.NO.407,ROOM NO.310. 3RD FLOOR MAQBOOL APARTMENT APARTMENT OPP, RABIYA MASJID MANGAL BAZAR SLAP KOT THANE MAHARASHTRA 421302
21527575    100 H.NO.407, ROOM NO.310, 3RD FLOOR MAQBOOL APARTMENT OPP,RABIYA MASJID MANGAL BAZAR SLAP KOTER GATE THANE MAHARASHTRA 421302
21527575    100 FLAT NO.103, 1ST FLOOR B-WING,CTS NO.388,KAAP TALAVO  ZAITOON PURA BEHIND KOTER GATE MASJID BHIWANDI THANE MAHARASHTRA 421302
21527575    100 VPO. SAHWA   CHURU RAJASTHAN 331302
21527575    100 WARD NO.-3 NATT ROAD TALWANDI SABO BATHINDA  BATHINDA PUNJAB 151302
21527575    100 H.NO.-137 RAMA ROAD TALWANDI SABO BATHINDA  BATHINDA PUNJAB 151302
21527575    100 WARD NO 25 GHADSISAR ROAD BASANT KUNJ KE SAMNE HANUMAN MANDIR KE PASS CHOUDHARY COLONY GANGASHAR BIKANER RAJASTHAN 334001
21527575    100 Karchha Kalan   UDAIPUR RAJASTHAN 313803
21527575    100 VAGPUR KARCHCHA KALAN   UDAIPUR RAJASTHAN 313803
21527575    100 VILLAGE GORIYAN TEHSIL UDAIPURWATI DIST JHUNJHUNU  JHUJHUNU RAJASTHAN 333307


  • 답변 # 1

    열 이름이 명확하지 않으므로 일반 코드를 작성하고 있습니다.

    이 튜플의 세 번째 요소로 B 열을 업데이트하고 있습니다.

    도움이 되길 바랍니다 :)

    df['B'] = df['B'].apply(lambda x: x[0][1])
    
    

    샘플 프로그램 ::

    import pandas as pd
    Cars = {'A': [21527575],
            'B': [[('H.NO.407,ROOM NO.310. 3RD FLOOR MAQBOOL APARTMENT APARTMENT OPP, RABIYA MASJID MANGAL BAZAR SLAP KOT THANE MAHARASHTRA 421302', 100)]],
            'C' : [' H.NO.407,ROOM NO.310. 3RD FLOOR MAQBOOL APARTMENT APARTMENT OPP, RABIYA MASJID MANGAL BAZAR SLAP KOT THANE MAHARASHTRA 421302']
            }
    data = pd.DataFrame(Cars)
    data['B'] = data['B'].apply(lambda x: x[0][1])
    print(data)
    
    

    출력 ::

        A    B                                                  C
    0  21527575  100   H.NO.407,ROOM NO.310. 3RD FLOOR MAQBOOL APARTMENT APARTMENT OPP, RABIYA MASJID MANGAL BAZAR SLAP KOT THANE MAHARASHTRA 421302
    
    

  • 답변 # 2

    데이터 프레임이 약간 불분명했습니다.

    문제가 해결되는지 확인하십시오.

    import pandas as pd
    # Sample data frame
    data = pd.DataFrame({'a': [1, 2, 3], 'b': [[(1, 2)], [(2, 3)], [(3, 4)]]})
    print(data)
    # Data
       a         b
    0  1  [(1, 2)]
    1  2  [(2, 3)]
    2  3  [(3, 4)]
    # Fix
    # [-1] selects last element in tuple
    data['b'] = data['b'].apply(lambda x: x[0][-1])
    print(data)
    # Result
       a  b
    0  1  2
    1  2  3
    2  3  4
    
    

  • 이전 autodesk forge - {faultname} - 지정된 client_id가 api 제품에 액세스 할 수 없습니다
  • 다음 ansible을 사용하여 원격 공유 경로로 복사