aboutsummaryrefslogtreecommitdiff
path: root/OcrClient/Services/YandexOcrClient.cs
blob: 8568e38661afaed320ad8d72d3b5494784453ade (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using Newtonsoft.Json;
using OcrClient.Models;

namespace OcrClient.Services;

public class YandexOcrClient : IOcrClient
{
	private readonly HttpClient _httpClient;
	public YandexOcrClient(HttpClient httpClient)
	{
		_httpClient = httpClient;
		_httpClient.BaseAddress = new Uri("https://ocr.api.cloud.yandex.net/ocr/v1/");
	}

	public async Task<IEnumerable<object[,]>> ProcessImage(string base64Image, string xFolderId, string apiKey)
	{
		using StringContent jsonContent = new(
			JsonConvert.SerializeObject(new
			{
				mimeType = "PNG",
				languageCodes = new string[] { "ru", "en" },
				model = "table",
				content = base64Image
			}),
			Encoding.UTF8,
			"application/json");
		_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Api-Key", apiKey);
		_httpClient.DefaultRequestHeaders.Add("x-folder-id", xFolderId);
		_httpClient.DefaultRequestHeaders.Add("x-data-logging-enable", "true");

		using HttpResponseMessage response = await _httpClient.PostAsync("recognizeText", jsonContent);
		response.EnsureSuccessStatusCode();

		string jsonResponse = await response.Content.ReadAsStringAsync();
		OcrResponse? deserialized = JsonConvert.DeserializeObject<OcrResponse>(jsonResponse);

		if (deserialized != null)
		{
			var tables = deserialized?.Result?.TextAnnotation?.Tables ?? Enumerable.Empty<Table>();
			if (tables.Any())
			{
				List<object[,]> result = new();
				foreach (var table in tables)
				{
					if (table.Cells == null || table.Cells.Count == 0) 
					{
						continue;
					}
					int columnCount = int.Parse(table.ColumnCount);
					int rowCount = int.Parse(table.RowCount);
					object[,] cells = new object[rowCount, columnCount];
					
					foreach (Cell cell in table.Cells)
					{
						int rowIndex = int.Parse(cell.RowIndex);
						int columnIndex = int.Parse(cell.ColumnIndex);
						cells[rowIndex, columnIndex] = double.TryParse(cell.Text, out double v) ?
							v : cell.Text ?? string.Empty;
					}
					result.Add(cells);
				}
				return result;
			}
		}
		return null;
	}
}