master
/ application / trainTicket.py

trainTicket.py @6d134f7 raw · history · blame

"""
火车票
"""
from apphelper.image import union_rbox
import re
class trainTicket:
    """
    火车票结构化识别
    """
    def __init__(self,result):
        self.result = union_rbox(result,0.2)
        self.N = len(self.result)
        self.res = {}
        self.station()
        self.time()
        self.price()
        self.full_name()
        
    def station(self):
        """
        安顺站K492贵阳站
        re.findall('[一-龥]+站','安顺站K492贵阳站'),re.findall('[一-龥]+站(.+?)[][一-龥]+站','安顺站K492贵阳站')
        
        """
        station={}
        for i in range(self.N):
            txt = self.result[i]['text'].replace(' ','')
            txt = txt.replace(' ','')
            
            res = re.findall('[一-龥]+站',txt),re.findall('[一-龥]+站(.+?)[][一-龥]+站',txt)
            if len(res[0])>0:
                
                station['出发'],station['到达'] = res[0][0].replace('站',''),res[0][1].replace('站','')
            if len(res[1])>0:
                station['车次'] = res[1][0]
                
            if len(station)>0:
                self.res.update(station)
                break
    
    def time(self):
        """
        提取日期 时间 
        """
        time={}
        for i in range(self.N):
            txt = self.result[i]['text'].replace(' ','')
            txt = txt.replace(' ','')
            ##匹配日期
            res = re.findall('[0-9]{1,4}年[0-9]{1,2}月[0-9]{1,2}日',txt)
            if len(res)>0:
                time['日期']  =res[0].replace('年','-').replace('月','-').replace('日','')
                ##匹配时间
                res = re.findall('[0-9]{1,2}:[0-9]{1,2}',txt)
                if len(res)>0:
                    time['时间']  =res[0]
                    self.res.update(time)
                    break
    
    def price(self):
        """
        车票价格
        """
        price={}
        for i in range(self.N):
            txt = self.result[i]['text'].replace(' ','')
            txt = txt.replace(' ','')
            ##车票价格
            res = re.findall('¥[0-9]{1,4}.[0-9]{1,2}元',txt)
            res += re.findall('[0-9]{1,4}.[0-9]{1,2}元',txt)
            res += re.findall('[0-9]{1,6}元',txt)
            res += re.findall('¥[0-9]{1,4}.[0-9]{1,2}',txt)
            if len(res)>0:
                price['车票价格']  =res[0].replace('¥','').replace('元','')
                self.res.update(price) 
                break
                
    
                
    def full_name(self):
        """
        姓名
        """
        name={}
        for i in range(self.N):
            txt = self.result[i]['text'].replace(' ','')
            txt = txt.replace(' ','')
            ##车票价格
            res = re.findall("\d*\*\d*([一-龥]{1,4})",txt)
            if len(res)>0:
                name['姓名']  =res[0]
                self.res.update(name)