00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #define __ASSEMBLY__
00032 #include <linux/linkage.h>
00033
00034 #define Width 8(%ebp)
00035 #define Height 12(%ebp)
00036
00037
00038 #define Src2 16(%ebp)
00039 #define Dst2 20(%ebp)
00040
00041
00042 #define SrcY 16(%ebp)
00043 #define SrcU 20(%ebp)
00044 #define SrcV 24(%ebp)
00045 #define Dst4 28(%ebp)
00046
00047
00048 #define Src4 16(%ebp)
00049 #define DstY 20(%ebp)
00050 #define DstU 24(%ebp)
00051 #define DstV 28(%ebp)
00052
00053
00054
00055
00056
00057
00058
00059 #define PixelBuffer -64(%ebp)
00060 #define Uptr -68(%ebp)
00061 #define Vptr -72(%ebp)
00062
00063 .text
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073 test_param_2:
00074 mov Src2, %esi
00075 mov Dst2, %edi
00076
00077 cmp $0, %esi # NULL pointers?
00078 je param_fail
00079 cmp $0, %edi
00080 je param_fail
00081
00082 jmp test_width_height
00083
00084
00085 test_param_31:
00086 mov Dst4, %edi # NULL pointers
00087 cmp $0, %edi
00088 je param_fail
00089
00090 mov SrcV, %esi
00091 cmp $0, %esi
00092 je param_fail
00093 mov %esi, Vptr
00094
00095 mov SrcU, %esi
00096 cmp $0, %esi
00097 je param_fail
00098 mov %esi, Uptr
00099
00100 mov SrcY, %esi
00101 cmp $0, %esi
00102 je param_fail
00103
00104 jmp test_width_height
00105
00106
00107 test_param_13:
00108 mov Src4, %esi # NULL pointers
00109 cmp $0, %esi
00110 je param_fail
00111
00112 mov DstV, %edi
00113 cmp $0, %edi
00114 je param_fail
00115 mov %edi, Vptr
00116
00117 mov DstU, %edi
00118 cmp $0, %edi
00119 je param_fail
00120 mov %edi, Uptr
00121
00122 mov DstY, %edi
00123 cmp $0, %edi
00124 je param_fail
00125
00126 jmp test_width_height
00127
00128 nop
00129
00130 test_width_height:
00131 cmpl $0, Width
00132 jbe param_fail
00133 testl $3, Width # multiple of 4?
00134 jnz param_fail # Nope...
00135
00136 cmp $0, Height # check illegal height
00137 jbe param_fail
00138 testl $1, Height # Odd no. of lines?
00139 jnz param_fail # Aye
00140
00141
00142
00143
00144 param_ok:
00145 clc # Success: clear carry
00146 ret
00147
00148 param_fail:
00149 stc # Fail: set carry
00150 ret
00151
00152
00153
00154 # This will fill PixelBuffer with 4 grey scale pixels (Y)
00155 # In: %eax = Value (Y3Y2Y1Y0)
00156 # Out:
00157 # Modifies: %ecx (-4)
00158 # Destroys: %edx
00159 expand_4_y:
00160 mov %eax, %edx # Keep in edx (we need eax)
00161 lea PixelBuffer, %edi
00162
00163 0: # This code is executed 4 times
00164 movzbl %dl, %eax # move, zero extending byte-to-long
00165 shl $8, %eax # 8 digit precision
00166
00167 stosl # Expand into PixelBuffer
00168 stosl
00169 stosl
00170 add $4, %edi # Skip alpha
00171
00172 shr $8, %edx # next Y
00173
00174 dec %ecx
00175 test $3, %ecx
00176 jnz 0b
00177
00178 ret # from expand_4_y
00179
00180 # This will add the color factors to the (grey) values in PixelBuffer
00181 # In: %ebx (U1U0V1V0)
00182 # Out:
00183 # Modifies:
00184 # Destroys: %edi, %ebx, %eax, %edx
00185 expand_4_uv:
00186 lea PixelBuffer, %edi # reset pointer
00187
00188 # V0
00189 sub $128, %bl
00190 movsbl %bl, %eax
00191 mov $359, %edx # Vr
00192 mul %edx
00193 add %eax, 0x00(%edi)
00194 add %eax, 0x10(%edi)
00195
00196 movsbl %bl, %eax
00197 mov $183, %edx # Vg
00198 mul %edx
00199 sub %eax, 0x04(%edi)
00200 sub %eax, 0x14(%edi)
00201
00202 # V1
00203 sub $128, %bh
00204 movsbl %bh, %eax
00205 mov $359, %edx # Vr
00206 mul %edx
00207 add %eax, 0x20(%edi)
00208 add %eax, 0x30(%edi)
00209
00210 movsbl %bh, %eax
00211 mov $183, %edx # Vg
00212 mul %edx
00213 sub %eax, 0x24(%edi)
00214 sub %eax, 0x34(%edi)
00215
00216 # U0
00217 bswap %ebx # Get U values in lower half
00218 sub $128, %bh
00219 movsbl %bh, %eax
00220 mov $88, %edx # Ug
00221 mul %edx
00222 sub %eax, 0x04(%edi)
00223 sub %eax, 0x14(%edi)
00224
00225 movsbl %bh, %eax
00226 mov $454, %edx # Ub
00227 mul %edx
00228 add %eax, 0x08(%edi)
00229 add %eax, 0x18(%edi)
00230
00231 # U1
00232 sub $128, %bl
00233 movsbl %bl, %eax
00234 mov $88, %edx # Ug
00235 mul %edx
00236 sub %eax, 0x24(%edi)
00237 sub %eax, 0x34(%edi)
00238
00239 movsbl %bl, %eax
00240 mov $454, %edx # Ub
00241 mul %edx
00242 add %eax, 0x28(%edi)
00243 add %eax, 0x38(%edi)
00244 ret # expand_4_uv
00245
00246
00247
00248 do_four_yuvi:
00249 push %edi
00250
00251 lodsl # 4 bytes at a time
00252
00253 call expand_4_y
00254
00255 # now do UV values. on even lines, Y is followed by U values; on
00256 # odd lines V values follow. The U and V values are always pushed
00257 # on the stack in this order:
00258 # U V
00259
00260 # First, calculate offset per line (1.5 * width)
00261 mov Width, %ebx # width
00262 shl %ebx # 2 *
00263 add Width, %ebx # 3 *
00264 shr %ebx # 1.5 *
00265
00266 # even or odd lines
00267 testl $1, Height
00268 jz 2f
00269
00270 # odd line; we are at V data, but do U data first
00271 neg %ebx # make ebx offset negative
00272 mov (%esi,%ebx),%ax # U
00273 push %ax
00274 lodsw # V
00275 push %ax
00276 jmp 3f
00277
00278 2: # even line
00279 lodsw # U
00280 push %ax
00281 sub $2, %ebx
00282 mov (%esi,%ebx), %ax # V
00283 push %ax
00284
00285 3: # Okay, so we now have the U and V values... expand into PixelBuffer
00286
00287 pop %ebx
00288 call expand_4_uv
00289
00290 pop %edi
00291 ret # from do_four_yuvi
00292
00293
00294 # Do four pixels, in planar format
00295 do_four_yuvp:
00296 push %edi
00297
00298 # The first part is the same as for interlaced (4 bytes Y)
00299 lodsl # 4 bytes at a time
00300 call expand_4_y
00301
00302 # now gather U and V values...
00303 mov Uptr, %ebx # Use Uptr/Vptr
00304 mov (%ebx), %ax
00305 push %ax
00306 add $2, %ebx
00307 mov %ebx, Uptr
00308
00309 mov Vptr, %ebx
00310 mov (%ebx), %ax
00311 push %ax
00312 add $2, %ebx
00313 mov %ebx, Vptr
00314
00315 pop %ebx
00316 call expand_4_uv
00317
00318 pop %edi
00319 ret
00320
00321
00322 # Do four pixels, in yuyv interlaced format
00323 do_four_yuyv:
00324 push %edi
00325
00326 lodsl # v0y1u0y0
00327 mov %eax, %ebx
00328 bswap %ebx # y0u0y1v0
00329 mov %bh, %ah # v0y1y1y0
00330 and $0x00ff00ff, %ebx # __u0__v0
00331 push %ax # y1y0
00332
00333 lodsl # v1y3u1y2 # mix register instructions
00334 mov %eax, %edx # so CPU pipeline doesnt stall
00335 rol $16, %eax # u1y2v1y3
00336 mov %dl, %dh # v1y3y2y2
00337 and $0xff00ff00, %eax # u1__v1__
00338 mov $0, %dl # v1y3y2__
00339 or %eax, %ebx # u1u0v1v0
00340 shl $8, %edx # y3y2____
00341 pop %dx # y3y2y1y0
00342 mov %edx, %eax
00343 call expand_4_y
00344 call expand_4_uv
00345
00346 pop %edi
00347 ret
00348
00349 limit_pixels:
00350 # Limit all values in PixelBuffer
00351 push %esi
00352 push %edi
00353 push %ecx
00354 lea PixelBuffer, %esi
00355 mov %esi, %edi
00356 mov $16, %ecx
00357 0: lodsl
00358 cmp $0, %eax # this would have been a perfect spot for CMOVxx instructions...
00359 jl 2f # except they only work on Pentium Pro processors,
00360 cmp $0xff00, %eax # and not even all of them
00361 jg 3f
00362 add $4, %edi # no use for stosl here
00363 loop 0b
00364 jmp 9f
00365 2: mov $0, %eax
00366 stosl
00367 loop 0b
00368 jmp 9f
00369 3: mov $0xff00, %eax
00370 stosl
00371 loop 0b
00372 jmp 9f
00373
00374 9: pop %ecx
00375 pop %edi
00376 pop %esi
00377 ret # from limit_pixels
00378
00379
00380
00381
00382
00383
00384 push_rgb24:
00385 push %ecx
00386 push %esi
00387 lea PixelBuffer, %esi
00388 mov $4, %ecx
00389 0: lodsl
00390 shr $8, %eax
00391 mov %al, (%edi) # Red
00392 lodsl
00393 shr $8, %eax
00394 mov %al, 1(%edi) # Green
00395 lodsl
00396 shr $8, %eax
00397 mov %al, 2(%edi) # Blue
00398 add $3, %edi
00399 lodsl # dummy
00400 loop 0b
00401 pop %esi
00402 pop %ecx
00403 ret
00404
00405
00406 push_bgr24:
00407 push %ecx
00408 push %esi
00409 lea PixelBuffer, %esi
00410 mov $4, %ecx
00411 0: lodsl
00412 shr $8, %eax
00413 mov %al, 2(%edi) # Red
00414 lodsl
00415 shr $8, %eax
00416 mov %al, 1(%edi) # Green
00417 lodsl
00418 shr $8, %eax
00419 mov %al, (%edi) # Blue
00420 add $3, %edi
00421 lodsl # dummy
00422 loop 0b
00423 pop %esi
00424 pop %ecx
00425 ret
00426
00427
00428 push_rgb32:
00429 push %ecx
00430 push %esi
00431 mov $16, %ecx
00432 lea PixelBuffer, %esi
00433 0: lodsl # red
00434 shr $8, %eax # 8 bit precision
00435 stosb
00436 loop 0b
00437 pop %esi
00438 pop %ecx
00439 ret
00440
00441
00442
00443 push_bgr32:
00444 # copy all 4 values to output buffer
00445 push %ecx
00446 push %esi
00447 mov $4, %ecx
00448 lea PixelBuffer, %esi
00449 0: lodsl # red
00450 shr $8, %eax # 8 bit precision
00451 mov %al, 2(%edi)
00452 lodsl # green
00453 shr $8, %eax
00454 mov %al, 1(%edi)
00455 lodsl # blue
00456 shr $8, %eax
00457 mov %al, (%edi)
00458 add $4, %edi
00459 lodsl # dummy
00460 loop 0b
00461 pop %esi
00462 pop %ecx
00463 ret
00464
00465
00466
00467
00468
00469
00470
00471 ENTRY(ccvt_420i_rgb24)
00472 enter $72, $0 # no extra space, no stackframes
00473 push %ebx
00474 push %esi
00475 push %edi
00476
00477 call test_param_2
00478 jc 9f
00479
00480 0: mov Width, %ecx # width
00481 1: call do_four_yuvi
00482 call limit_pixels
00483 call push_rgb24
00484
00485 cmp $0, %ecx
00486 jnz 1b # end of line?
00487 decl Height # yes; decrement line counter
00488 jnz 0b
00489
00490 9: pop %edi
00491 pop %esi
00492 pop %ebx
00493 leave
00494 ret
00495
00496
00497
00498 ENTRY(ccvt_420i_bgr24)
00499 enter $72, $0 # no extra space, no stackframes
00500 push %ebx
00501 push %esi
00502 push %edi
00503
00504 call test_param_2
00505 jc 9f
00506
00507 0: mov Width, %ecx # width
00508 1: call do_four_yuvi
00509 call limit_pixels
00510 call push_bgr24
00511
00512 cmp $0, %ecx
00513 jnz 1b # end of line?
00514 decl Height # yes; decrement line counter
00515 jnz 0b
00516
00517 9: pop %edi
00518 pop %esi
00519 pop %ebx
00520 leave
00521 ret
00522
00523
00524
00525
00526 ENTRY(ccvt_420i_rgb32)
00527 enter $72, $0 # no extra space, no stackframes
00528 push %ebx
00529 push %esi
00530 push %edi
00531
00532 call test_param_2
00533 jc 9f
00534
00535 0: mov Width, %ecx # width
00536 1: call do_four_yuvi
00537 call limit_pixels
00538 call push_rgb32
00539
00540 cmp $0, %ecx # end of line?
00541 jnz 1b
00542 decl Height # yes; decrement line counter
00543 jnz 0b
00544
00545 9: pop %edi
00546 pop %esi
00547 pop %ebx
00548 leave
00549 ret
00550
00551
00552
00553 ENTRY(ccvt_420i_bgr32)
00554 enter $72, $0 # no extra space, no stackframes
00555 push %ebx
00556 push %esi
00557 push %edi
00558
00559 call test_param_2
00560 jc 9f
00561
00562 0: mov Width, %ecx # width
00563 1: call do_four_yuvi
00564 call limit_pixels
00565 call push_bgr32
00566
00567 cmp $0, %ecx # end of line?
00568 jnz 1b
00569 decl Height # yes; decrement line counter
00570 jnz 0b
00571
00572 9: pop %edi
00573 pop %esi
00574 pop %ebx
00575 leave
00576 ret
00577
00578
00579
00580
00581
00582
00583 ENTRY(ccvt_yuyv_rgb32)
00584 enter $72, $0 # no extra space, no stackframes
00585 push %ebx
00586 push %esi
00587 push %edi
00588
00589 call test_param_2
00590 jc 9f
00591
00592 0: mov Width, %ecx # width
00593 1: call do_four_yuyv
00594 call limit_pixels
00595 call push_rgb32
00596
00597 cmp $0, %ecx # end of line?
00598 jnz 1b
00599
00600 8: decl Height # yes; decrement line counter
00601 jnz 0b
00602
00603 9: pop %edi
00604 pop %esi
00605 pop %ebx
00606 leave
00607 ret
00608
00609
00610 ENTRY(ccvt_yuyv_bgr32)
00611 enter $72, $0 # no extra space, no stackframes
00612 push %ebx
00613 push %esi
00614 push %edi
00615
00616 call test_param_2
00617 jc 9f
00618
00619 # YUYV -> RGBa RGBa
00620
00621 0: mov Width, %ecx # width
00622 1: call do_four_yuyv
00623 call limit_pixels
00624 call push_bgr32
00625
00626 cmp $0, %ecx # end of line?
00627 jnz 1b
00628
00629 8: decl Height # yes; decrement line counter
00630 jnz 0b
00631
00632 9: pop %edi
00633 pop %esi
00634 pop %ebx
00635 leave
00636 ret
00637
00638
00639
00640
00641
00642
00643 ENTRY(ccvt_420p_rgb32)
00644 enter $72, $0
00645 push %ebx
00646 push %esi
00647 push %edi
00648
00649 call test_param_31
00650 jc 9f
00651
00652 mov Width, %eax # width
00653 mull Height # * height
00654 mov SrcU, %eax # Copy U/V pointers
00655 mov %eax, Uptr
00656 mov SrcV, %eax
00657 mov %eax, Vptr
00658
00659 0: mov Width, %ecx # width
00660 1: call do_four_yuvp
00661 call limit_pixels
00662 call push_rgb32
00663
00664 cmp $0, %ecx # end of line?
00665 jnz 1b
00666
00667 testl $1, Height # odd/even line
00668 jnz 8f
00669
00670 mov Width, %eax # Even: rewind U/V pointers
00671 shr %eax
00672 sub %eax, Uptr
00673 sub %eax, Vptr
00674
00675 8: decl Height # yes; decrement line counter
00676 jnz 0b
00677
00678 9: pop %edi
00679 pop %esi
00680 pop %ebx
00681 leave
00682 ret
00683
00684
00685
00686
00687 ENTRY(ccvt_420p_bgr32)
00688 enter $72, $0
00689 push %ebx
00690 push %esi
00691 push %edi
00692
00693 call test_param_31
00694 jc 9f
00695
00696 mov Width, %eax # width
00697 mull Height # * height
00698 mov SrcU, %eax # Copy U/V pointers
00699 mov %eax, Uptr
00700 mov SrcV, %eax
00701 mov %eax, Vptr
00702
00703 0: mov Width, %ecx # width
00704 1: call do_four_yuvp
00705 call limit_pixels
00706 call push_bgr32
00707
00708 cmp $0, %ecx # end of line?
00709 jnz 1b
00710
00711 testl $1, Height # odd/even line
00712 jnz 8f
00713
00714 mov Width, %eax # Even: rewind U/V pointers
00715 shr %eax
00716 sub %eax, Uptr
00717 sub %eax, Vptr
00718
00719 8: decl Height # yes; decrement line counter
00720 jnz 0b
00721
00722 9: pop %edi
00723 pop %esi
00724 pop %ebx
00725 leave
00726 ret
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741 ENTRY(ccvt_rgb24_420p)
00742 enter $96, $0 # 24 bytes extra stack, no stackframes
00743 push %ebx # -76: line width in bytes
00744 push %esi # -80: height (copy)
00745 push %edi # -84: width (copy)
00746 # -88: red factor
00747 # -92: green factor
00748 # -96: blue factor
00749 call test_param_13
00750 jc 9f
00751
00752 mov Width, %eax
00753 shl %eax
00754 add Width, %eax # 3 * width = line increment
00755 mov %eax, -76(%ebp)
00756
00757 mov Height, %eax
00758 mov %eax, -80(%ebp) # copy height into stackframe
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770 # 1st pass: Y values. Set factors
00771 movl $77 , -88(%ebp) # 0.299
00772 movl $150, -92(%ebp) # 0.587
00773 movl $29 , -96(%ebp) # 0.114
00774
00775 0: mov Width, %ecx # width
00776 1: xor %ebx, %ebx # 0
00777 call rgb_multiply
00778 shr $8, %ebx # divide by 256 (no need for limitor, since 77 + 150 + 29 = 256)
00779 mov %bl, %al
00780 stosb # store it into Y buffer
00781
00782 dec %ecx # end of line?
00783 jnz 1b
00784 decl -80(%ebp) # end of image?
00785 jnz 0b
00786
00787 # Okay, now the U/V pointers...
00788 # The following code is passed twice, with different factors
00789 # Note that the %esi pointer jumps around quite a bit
00790
00791 # factors for U
00792 movl $-43, -88(%ebp) # -0.1687
00793 movl $-85, -92(%ebp) # -0.3313
00794 movl $128, -96(%ebp) # 0.5
00795 mov DstU, %edi # Set %edi register now
00796
00797 7: mov Src4, %esi # Rewind source pointer
00798
00799 mov Height, %eax # height
00800 shr %eax # / 2
00801 mov %eax, -80(%ebp) # copy
00802
00803 2: mov Width, %eax # width
00804 shr %eax # / 2
00805 mov %eax, -84(%ebp) # copy
00806
00807 3: xor %ebx, %ebx # 0
00808 mov $4, %ecx # average over 4 pixels
00809
00810 4: call rgb_multiply
00811
00812 dec %ecx
00813 jz 5f # done?
00814 cmp $2, %ecx # 3rd pixel.. move %esi to next line, with offset
00815 jne 4b
00816 sub $6, %esi # backup to where we started
00817 add -76(%ebp), %esi # add line increment
00818 jmp 4b
00819
00820 5: # okay, 4 pixels done...
00821 sub -76(%ebp), %esi # Get %esi back to its proper place
00822
00823 add $0x20000, %ebx # add 0.5 factor
00824 shr $10, %ebx # Divide by 4 * 256
00825 mov %bl, %al
00826 stosb # store it!
00827
00828 decl -84(%ebp) # end of line?
00829 jnz 3b
00830 add -76(%ebp), %esi # %esi to next line (actually, 2 lines further)
00831 decl -80(%ebp) # end of image?
00832 jnz 2b
00833
00834 # check if 3rd pass has been done
00835 cmpl $128, -88(%ebp)
00836 je 9f # Done!
00837 # Set factors for V pass
00838 movl $128 , -88(%ebp) # 0.5
00839 movl $-107, -92(%ebp) # -0.4187
00840 movl $-21 , -96(%ebp) # -0.0813
00841 mov DstV, %edi # %edi to V buffer
00842 jmp 7b # "Do it to me one more time..."
00843
00844 9: pop %edi
00845 pop %esi
00846 pop %ebx
00847 leave
00848 ret
00849
00850
00851
00852
00853 ENTRY(ccvt_bgr24_420p)
00854 enter $96, $0 # 24 bytes extra stack, no stackframes
00855 push %ebx # -4: line width in bytes
00856 push %esi # -8: height (copy)
00857 push %edi # -12: width (copy)
00858 # -16: red factor
00859 # -20: green factor
00860 # -24: blue factor
00861 call test_param_13
00862 jc 9f
00863
00864
00865
00866 mov Width, %eax
00867 shl %eax
00868 add Width, %eax # 3 * width = line increment
00869 mov %eax, -76(%ebp)
00870
00871 mov Height, %eax
00872 mov %eax, -80(%ebp) # copy height into stackframe
00873
00874 # 1st pass: Y values. Set factors
00875 movl $29 , -88(%ebp) # 0.114
00876 movl $150, -92(%ebp) # 0.587
00877 movl $77 , -96(%ebp) # 0.299
00878
00879 0: mov Width, %ecx # width
00880 1: xor %ebx, %ebx # 0
00881 call rgb_multiply
00882 shr $8, %ebx # divide by 256 (no need for limitor, since 77 + 150 + 29 = 256)
00883 mov %bl, %al
00884 stosb # store it into Y buffer
00885
00886 dec %ecx # end of line?
00887 jnz 1b
00888 decl -80(%ebp) # end of image?
00889 jnz 0b
00890
00891 # Okay, now the U/V pointers...
00892 # The following code is passed twice, with different factors
00893 # Note that the %esi pointer jumps around quite a bit
00894
00895 # factors for U
00896 movl $123, -88(%ebp) # 0.5
00897 movl $-85, -92(%ebp) # -0.3313
00898 movl $-43, -96(%ebp) # -0.1687
00899 mov DstU, %edi # Set %edi register now
00900
00901 7: mov Src4, %esi # Rewind source pointer
00902
00903 mov Height, %eax # height
00904 shr %eax # / 2
00905 mov %eax, -80(%ebp) # copy
00906
00907 2: mov Width, %eax # width
00908 shr %eax # / 2
00909 mov %eax, -84(%ebp) # copy
00910
00911 3: xor %ebx, %ebx # 0
00912 mov $4, %ecx # average over 4 pixels
00913
00914 4: call rgb_multiply
00915
00916 dec %ecx
00917 jz 5f # done?
00918 cmp $2, %ecx # 3rd pixel.. move %esi to next line, with offset
00919 jne 4b
00920 sub $6, %esi # backup to where we started
00921 add -76(%ebp), %esi # add line increment
00922 jmp 4b
00923
00924 5: # okay, 4 pixels done...
00925 sub -76(%ebp), %esi # Get %esi back to its proper place
00926
00927 add $0x20000, %ebx # add 0.5 factor
00928 shr $10, %ebx # Divide by 4 * 256
00929 mov %bl, %al
00930 stosb # store it!
00931
00932 decl -84(%ebp) # end of line?
00933 jnz 3b
00934 add -76(%ebp), %esi # %esi to next line (actually, 2 lines further)
00935 decl -80(%ebp) # end of image?
00936 jnz 2b
00937
00938 # check if 3rd pass has been done
00939 cmpl $-21, -88(%ebp)
00940 je 9f # Done!
00941 # Set factors for V pass
00942 movl $-21 , -88(%ebp) # -0.0813
00943 movl $-107, -92(%ebp) # -0.4187
00944 movl $128 , -96(%ebp) # 0.5
00945 mov DstV, %edi # %edi to V buffer
00946 jmp 7b # "Do it to me one more time..."
00947
00948 9: pop %edi
00949 pop %esi
00950 pop %ebx
00951 leave
00952 ret
00953
00954
00955
00956
00957 rgb_multiply:
00958 # do one RGB vector multiplication; its assumed the RGB factors
00959 # are set on the stack. The data is accumulated in ebx.
00960 lodsb # red byte
00961 and $0xff, %eax
00962 mov -88(%ebp), %edx # red factor
00963 mul %edx
00964 add %eax, %ebx
00965 lodsb # green byte
00966 and $0xff, %eax
00967 mov -92(%ebp), %edx # green factor
00968 mul %edx
00969 add %eax, %ebx
00970 lodsb # blue byte
00971 and $0xff, %eax
00972 mov -96(%ebp), %edx # blue factor
00973 mul %edx
00974 add %eax, %ebx # ebx now contains sum
00975 ret
00976
00977
00978
00979
00980
00981
00982
00983
00984 ENTRY(ccvt_420i_420p)
00985 enter $76, $0 # 4 bytes extra space, no stackframes
00986 push %ebx # -4: width / 4
00987 push %esi
00988 push %edi
00989
00990 call test_param_13
00991 jc 9f
00992
00993 # Okay, this is fairly easy... we first grab the Y values (4 bytes
00994 # at a time), then rewind and do the U values, and repeat for V.
00995 # This leaves us with a nice planar format
00996
00997 mov Width, %eax
00998 shr %eax
00999 shr %eax # width / 4
01000 mov %eax, -76(%ebp) # Store
01001
01002 # Y
01003 mov Height, %edx # line counter
01004 0: mov -76(%ebp), %ecx
01005 1: lodsl # get 4 bytes...
01006 stosl # ...push 4 bytes
01007 add $2, %esi # Skip U or V
01008 loop 1b
01009 dec %edx
01010 jnz 0b
01011
01012 # U
01013 mov Src4, %esi # rewind source pointer
01014 mov DstU, %edi
01015 add $4, %esi # set to U
01016 mov Height, %edx
01017 shr %edx # height / 2
01018 mov Width, %ebx
01019 shl %ebx
01020 add Width, %ebx
01021 shr %ebx # Width * 1.5 (line offset)
01022
01023 2: mov -76(%ebp), %ecx # width / 4
01024 3: lodsw # 2 bytes at a time
01025 stosw
01026 add $4, %esi # skip Y
01027 loop 3b
01028 add %ebx, %esi # Skip line (U is on even lines)
01029 dec %edx
01030 jnz 2b
01031
01032 # V
01033 mov Src4, %esi # rewind, set to V in first odd line
01034 add $4, %esi
01035 add %ebx, %esi # register re-use; no compiler can beat that :)
01036 mov DstV, %edi # V ptr
01037 mov Height, %edx
01038 shr %edx # height / 2
01039
01040 4: mov -76(%ebp), %ecx # Get width/4
01041 5: lodsw
01042 stosw
01043 add $4, %esi # Skip Y
01044 loop 5b
01045 add %ebx, %esi # Skip line (V is on odd lines)
01046 dec %edx
01047 jnz 4b
01048
01049
01050
01051 9: pop %edi
01052 pop %esi
01053 pop %ebx
01054 leave
01055 ret
01056
01057
01058
01059
01060 ENTRY(ccvt_420i_yuyv)
01061 enter $80, $0 # 8 bytes extra space, no stackframes
01062 push %ebx
01063 push %esi
01064 push %edi
01065
01066 call test_param_2
01067 jc 9f
01068
01069 mov Width, %ecx # -4: width / 4 = no. loops per line
01070 shr %ecx
01071 shr %ecx
01072 mov %ecx, -76(%ebp)
01073
01074 mov Width, %ebx # -8: width * 1.5 = line offset
01075 shl %ebx
01076 add Width, %ebx
01077 shr %ebx
01078 mov %ebx, -80(%ebp)
01079
01080 # Okay, this requires a bit of byte shuffling... we go from
01081 # YYYY UU
01082 # YYYY VV
01083 # to
01084 # YUYV YUYV
01085 # YUYV YUYV
01086 # which indeed takes up more space
01087
01088 #
01089
01090 0: mov -76(%ebp), %ecx
01091
01092 1: lodsl # 4 Y in eax
01093 testl $1, Height # even or odd line?
01094 jnz 2f
01095
01096 # Even
01097 mov -80(%ebp), %ebx
01098 mov (%ebx, %esi), %dx # 16 bits V
01099 shl $16, %edx # store in high word
01100 mov (%esi), %dx # 16 bits U
01101 add $2, %esi
01102 jmp 3f
01103
01104 2: # Odd
01105 mov -80(%ebp), %ebx
01106 neg %ebx # negative offset
01107 mov (%esi), %dx # 16 bits V
01108 shl $16, %edx # store in high word
01109 mov (%ebx, %esi), %dx # 16 bits U
01110 add $2, %esi
01111
01112 3: # eax = Y3Y2Y1Y0, edx = V1V0U1U0, ebx is free
01113 push %eax
01114
01115 movzbl %al, %ebx # ______y0
01116 and $0xFF00, %eax # ____y1__
01117 shl $8, %eax # __y1____
01118 or %ebx, %eax # __y1__y0
01119 mov %edx, %ebx # v1v0u1u0
01120 shl $8, %ebx # v0u1u0__
01121 and $0xff00ff00, %ebx # v0__u0__
01122 or %ebx, %eax # v0y1u0y0
01123 stosl
01124
01125 pop %eax # y3y2y1y0
01126 # Second half
01127 shr $8, %eax # __y3y2y1
01128 shr $8, %ax # __y3__y2
01129 and $0xff00ff00, %edx # v1__u1__
01130 or %edx, %eax # v1y3u1y2
01131 stosl
01132
01133 loop 1b
01134
01135
01136 decl Height # height--
01137 jnz 0b
01138 # Done
01139
01140 9: pop %edi
01141 pop %esi
01142 pop %ebx
01143 leave
01144 ret