▷Elimine el texto de fondo y el ruido de una imagen mediante el procesamiento de imágenes con OpenCV ✔️ Foro Ayuda 【 2024 】

Solución:

Aquí hay dos enfoques posibles y un método para corregir texto distorsionado:

Método 1: Operaciones morfológicas + filtrado de contornos

Obtenga una imagen binaria. Cargue la imagen, escala de grises, luego el umbral de Otsu.
Elimina los contornos del texto. Cree un núcleo rectangular con cv2.getStructuringElement y luego realizar operaciones morfológicas para eliminar el ruido.
Filtra y elimina los pequeños ruidos. Encuentre contornos y filtre usando el área del contorno para eliminar las partículas pequeñas. Eliminamos eficazmente el ruido rellenando el contorno con cv2.drawContours
Realice OCR. Invertimos la imagen y luego aplicamos un ligero desenfoque gaussiano. Luego hacemos OCR usando Pytesseract con el --psm 6 opción de configuración para tratar la imagen como un solo bloque de texto. Observe que Tesseract mejora la calidad de otros métodos para mejorar la detección y las opciones de configuración de Pytesseract para configuraciones adicionales.

Imagen de entrada -> Binario -> Apertura de Morph

Filtrado de áreas de contorno -> Invertir -> Aplicar desenfoque para obtener resultado

Resultado de OCR

YabVzu

Código

import cv2
import pytesseract
import numpy as np

pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCRtesseract.exe"

# Load image, grayscale, Otsu's threshold
image = cv2.imread('2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Morph open to remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

# Find contours and remove small noise
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    area = cv2.contourArea(c)
    if area < 50:
        cv2.drawContours(opening, [c], -1, 0, -1)

# Invert and apply slight Gaussian blur
result = 255 - opening
result = cv2.GaussianBlur(result, (3,3), 0)

# Perform OCR
data = pytesseract.image_to_string(result, lang='eng', config='--psm 6')
print(data)

cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('result', result)
cv2.waitKey()

Método # 2: Segmentación de color

Con la observación de que el texto que se desea extraer tiene un contraste distinguible del ruido en la imagen, podemos usar el umbral de color para aislar el texto. La idea es convertir al formato HSV y luego al umbral de color para obtener una máscara utilizando un rango de color inferior / superior. Desde donde usamos el mismo proceso hasta OCR con Pytesseract.

Imagen de entrada -> Máscara -> Resultado

Código

import cv2
import pytesseract
import numpy as np

pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCRtesseract.exe"

# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('2.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)

# Invert image and OCR
invert = 255 - mask
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)

cv2.imshow('mask', mask)
cv2.imshow('invert', invert)
cv2.waitKey()

Corregir texto distorsionado

OCR funciona mejor cuando la imagen es horizontal. Para asegurarnos de que el texto tenga un formato ideal para OCR, podemos realizar una transformación de perspectiva. Después de eliminar todo el ruido para aislar el texto, podemos realizar una transformación cercana para combinar contornos de texto individuales en un solo contorno. Desde aquí podemos encontrar el cuadro delimitador girado usando cv2.minAreaRect y luego realice una transformación de perspectiva de cuatro puntos usando imutils.perspective.four_point_transform. Continuando con la máscara limpia, aquí están los resultados:

Máscara -> Morph cerrar -> Cuadro delimitador girado detectado -> Resultado

Salida con la otra imagen

Código actualizado para incluir transformación de perspectiva

import cv2
import pytesseract
import numpy as np
from imutils.perspective import four_point_transform

pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCRtesseract.exe"

# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('1.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)

# Morph close to connect individual text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)

# Find rotated bounding box then perspective transform
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
rect = cv2.minAreaRect(cnts[0])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
warped = four_point_transform(255 - mask, box.reshape(4, 2))

# OCR
data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6')
print(data)

cv2.imshow('mask', mask)
cv2.imshow('close', close)
cv2.imshow('warped', warped)
cv2.imshow('image', image)
cv2.waitKey()

Nota: El rango de umbral de color se determinó utilizando este script de umbral de HSV

import cv2
import numpy as np

def nothing(x):
    pass

# Load image
image = cv2.imread('2.png')

# Create a window
cv2.namedWindow('image')

# Create trackbars for color change
# Hue is from 0-179 for Opencv
cv2.createTrackbar('HMin', 'image', 0, 179, nothing)
cv2.createTrackbar('SMin', 'image', 0, 255, nothing)
cv2.createTrackbar('VMin', 'image', 0, 255, nothing)
cv2.createTrackbar('HMax', 'image', 0, 179, nothing)
cv2.createTrackbar('SMax', 'image', 0, 255, nothing)
cv2.createTrackbar('VMax', 'image', 0, 255, nothing)

# Set default value for Max HSV trackbars
cv2.setTrackbarPos('HMax', 'image', 179)
cv2.setTrackbarPos('SMax', 'image', 255)
cv2.setTrackbarPos('VMax', 'image', 255)

# Initialize HSV min/max values
hMin = sMin = vMin = hMax = sMax = vMax = 0
phMin = psMin = pvMin = phMax = psMax = pvMax = 0

while(1):
    # Get current positions of all trackbars
    hMin = cv2.getTrackbarPos('HMin', 'image')
    sMin = cv2.getTrackbarPos('SMin', 'image')
    vMin = cv2.getTrackbarPos('VMin', 'image')
    hMax = cv2.getTrackbarPos('HMax', 'image')
    sMax = cv2.getTrackbarPos('SMax', 'image')
    vMax = cv2.getTrackbarPos('VMax', 'image')

    # Set minimum and maximum HSV values to display
    lower = np.array([hMin, sMin, vMin])
    upper = np.array([hMax, sMax, vMax])

    # Convert to HSV format and color threshold
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv, lower, upper)
    result = cv2.bitwise_and(image, image, mask=mask)

    # Print if there is a change in HSV value
    if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):
        print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))
        phMin = hMin
        psMin = sMin
        pvMin = vMin
        phMax = hMax
        psMax = sMax
        pvMax = vMax

    # Display result image
    cv2.imshow('image', result)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()

Su código produce mejores resultados que este. Aquí, establezco un umbral para upperb y lowerb valores basados en histograma CDF valores y un umbral. prensa ESC para obtener la siguiente imagen.

Este código es innecesariamente complejo y debe optimizarse de varias formas. El código se puede reordenar para omitir algunos pasos. Lo guardé porque algunas partes pueden ayudar a otras. Se puede eliminar parte del ruido existente manteniendo el contorno con el área por encima de cierto umbral. Cualquier sugerencia sobre otro método de reducción de ruido es bienvenida.

Aquí se puede encontrar un código similar más fácil para obtener 4 puntos de esquina para la transformación de perspectiva,

¿Detección precisa de esquinas?

Descripción del código:

Imagen original
Filtro de mediana (eliminación de ruido e identificación de ROI)
Umbral OTSU
Invertir imagen
Utilice la imagen en blanco y negro invertida como máscara para mantener la mayor parte del ROI como parte de la imagen original
Dilatación para mayor hallazgo de contorno
Marque el ROI dibujando rectángulos y puntos de esquina en la imagen original
Enderece el ROI y extráigalo
Filtro de mediana
Umbral OTSU
Invertir imagen para máscara
Enmascare la imagen recta para eliminar la mayor parte del ruido además del texto
En rango se usa con valores lowerb y upperb del histograma cdf como se mencionó anteriormente para reducir aún más el ruido
Tal vez erosionar la imagen en este paso produzca un resultado algo aceptable. En cambio, aquí esa imagen se dilata nuevamente y se usa como una máscara para obtener un ROI menos ruidoso de la imagen transformada en perspectiva.

Código:

## Press ESC button to get next image

import cv2
import cv2 as cv
import numpy as np


frame = cv2.imread('extra/c1.png')
#frame = cv2.imread('extra/c2.png')


## keeping a copy of original
print(frame.shape)
original_frame = frame.copy()
original_frame2 = frame.copy()


## Show the original image
winName = 'Original'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)



## Apply median blur
frame = cv2.medianBlur(frame,9)


## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)


#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)



# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n


## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)




## invert color
frame = cv2.bitwise_not(frame)

## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)


## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)


##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
cv.imshow(winName, img_gray & frame)
cv.waitKey(0)


## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)


## Get largest contour from contours
contours, hierarchy = cv2.findContours(frame, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)


## Get minimum area rectangle and corner points
rect = cv2.minAreaRect(max(contours, key = cv2.contourArea))
print(rect)
box = cv2.boxPoints(rect)
print(box)


## Sorted points by x and y
## Not used in this code
print(sorted(box , key=lambda k: [k[0], k[1]]))



## draw anchor points on corner
frame = original_frame.copy()
z = 6
for b in box:
    cv2.circle(frame, tuple(b), z, 255, -1)


## show original image with corners
box2 = np.int0(box)
cv2.drawContours(frame,[box2],0,(0,0,255), 2)
cv2.imshow('Detected Corners',frame)
cv2.waitKey(0)
cv2.destroyAllWindows()



## https://stackoverflow.com/questions/11627362/how-to-straighten-a-rotated-rectangle-area-of-an-image-using-opencv-in-python
def subimage(image, center, theta, width, height):
   shape = ( image.shape[1], image.shape[0] ) # cv2.warpAffine expects shape in (length, height)

   matrix = cv2.getRotationMatrix2D( center=center, angle=theta, scale=1 )
   image = cv2.warpAffine( src=image, M=matrix, dsize=shape )

   x = int(center[0] - width / 2)
   y = int(center[1] - height / 2)

   image = image[ y:y+height, x:x+width ]

   return image



## Show the original image
winName = 'Dilate Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)


## use the calculated rectangle attributes to rotate and extract it
frame = subimage(original_frame, center=rect[0], theta=int(rect[2]), width=int(rect[1][0]), height=int(rect[1][1]))
original_frame = frame.copy()
cv.imshow(winName, frame)
cv.waitKey(0)

perspective_transformed_image = frame.copy()



## Apply median blur
frame = cv2.medianBlur(frame,11)


## Show the original image
winName = 'Median Blur'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)


#kernel = np.ones((5,5),np.uint8)
#frame = cv2.dilate(frame,kernel,iterations = 1)



# Otsu's thresholding
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret2,thresh_n = cv.threshold(frame,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
frame = thresh_n


## Show the original image
winName = 'Otsu Thresholding'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)



## invert color
frame = cv2.bitwise_not(frame)

## Show the original image
winName = 'Invert Image'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)


## Dilate image
kernel = np.ones((5,5),np.uint8)
frame = cv2.dilate(frame,kernel,iterations = 1)

##
## Show the original image
winName = 'SUB'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
img_gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
frame = img_gray & frame
frame[np.where(frame==0)] = 255
cv.imshow(winName, frame)
cv.waitKey(0)





hist,bins = np.histogram(frame.flatten(),256,[0,256])

cdf = hist.cumsum()
cdf_normalized = cdf * hist.max()/ cdf.max()
print(cdf)
print(cdf_normalized)
hist_image = frame.copy()




## two decresing range algorithm
low_index = -1
for i in range(0, 256):
   if cdf[i] > 0:
      low_index = i
      break
print(low_index)

tol = 0
tol_limit = 20
broken_index = -1
past_val = cdf[low_index] - cdf[low_index + 1]
for i in range(low_index + 1, 255):
   cur_val = cdf[i] - cdf[i+1]
   if tol > tol_limit:
      broken_index = i
      break
   if cur_val < past_val:
      tol += 1
   past_val = cur_val

print(broken_index)




##
lower = min(frame.flatten())
upper = max(frame.flatten())
print(min(frame.flatten()))
print(max(frame.flatten()))

#img_rgb_inrange = cv2.inRange(frame_HSV, np.array([lower,lower,lower]), np.array([upper,upper,upper]))
img_rgb_inrange = cv2.inRange(frame, (low_index), (broken_index))
neg_rgb_image = ~img_rgb_inrange
## Show the original image
winName = 'Final'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, neg_rgb_image)
cv.waitKey(0)


kernel = np.ones((3,3),np.uint8)
frame = cv2.erode(neg_rgb_image,kernel,iterations = 1)
winName = 'Final Dilate'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
#cv.resizeWindow(winName, 800, 800)
cv.imshow(winName, frame)
cv.waitKey(0)


##
winName = 'Final Subtracted'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
img2 = np.zeros_like(perspective_transformed_image)
img2[:,:,0] = frame
img2[:,:,1] = frame
img2[:,:,2] = frame
frame = img2
cv.imshow(winName, perspective_transformed_image | frame)
cv.waitKey(0)


##
import matplotlib.pyplot as plt
plt.plot(cdf_normalized, color = 'b')
plt.hist(hist_image.flatten(),256,[0,256], color = 'r')
plt.xlim([0,256])
plt.legend(('cdf','histogram'), loc = 'upper left')
plt.show()

1. Filtro de mediana: